diff --git a/.clang-format b/.clang-format index e6080f5834a3..bc2ffb2a0b53 100644 --- a/.clang-format +++ b/.clang-format @@ -72,6 +72,10 @@ ForEachMacros: - 'apei_estatus_for_each_section' - 'ata_for_each_dev' - 'ata_for_each_link' + - '__ata_qc_for_each' + - 'ata_qc_for_each' + - 'ata_qc_for_each_raw' + - 'ata_qc_for_each_with_internal' - 'ax25_for_each' - 'ax25_uid_for_each' - 'bio_for_each_integrity_vec' @@ -85,6 +89,7 @@ ForEachMacros: - 'blk_queue_for_each_rl' - 'bond_for_each_slave' - 'bond_for_each_slave_rcu' + - 'bpf_for_each_spilled_reg' - 'btree_for_each_safe128' - 'btree_for_each_safe32' - 'btree_for_each_safe64' @@ -103,6 +108,8 @@ ForEachMacros: - 'drm_atomic_crtc_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane_state' + - 'drm_atomic_for_each_plane_damage' + - 'drm_connector_for_each_possible_encoder' - 'drm_for_each_connector_iter' - 'drm_for_each_crtc' - 'drm_for_each_encoder' @@ -121,11 +128,21 @@ ForEachMacros: - 'for_each_bio' - 'for_each_board_func_rsrc' - 'for_each_bvec' + - 'for_each_card_components' + - 'for_each_card_links' + - 'for_each_card_links_safe' + - 'for_each_card_prelinks' + - 'for_each_card_rtds' + - 'for_each_card_rtds_safe' + - 'for_each_cgroup_storage_type' - 'for_each_child_of_node' - 'for_each_clear_bit' - 'for_each_clear_bit_from' - 'for_each_cmsghdr' - 'for_each_compatible_node' + - 'for_each_component_dais' + - 'for_each_component_dais_safe' + - 'for_each_comp_order' - 'for_each_console' - 'for_each_cpu' - 'for_each_cpu_and' @@ -133,6 +150,10 @@ ForEachMacros: - 'for_each_cpu_wrap' - 'for_each_dev_addr' - 'for_each_dma_cap_mask' + - 'for_each_dpcm_be' + - 'for_each_dpcm_be_rollback' + - 'for_each_dpcm_be_safe' + - 'for_each_dpcm_fe' - 'for_each_drhd_unit' - 'for_each_dss_dev' - 'for_each_efi_memory_desc' @@ -149,6 +170,7 @@ ForEachMacros: - 'for_each_iommu' - 'for_each_ip_tunnel_rcu' - 'for_each_irq_nr' + - 'for_each_link_codecs' - 'for_each_lru' - 'for_each_matching_node' - 'for_each_matching_node_and_match' @@ -160,6 +182,7 @@ ForEachMacros: - 'for_each_mem_range_rev' - 'for_each_migratetype_order' - 'for_each_msi_entry' + - 'for_each_msi_entry_safe' - 'for_each_net' - 'for_each_netdev' - 'for_each_netdev_continue' @@ -183,12 +206,14 @@ ForEachMacros: - 'for_each_node_with_property' - 'for_each_of_allnodes' - 'for_each_of_allnodes_from' + - 'for_each_of_cpu_node' - 'for_each_of_pci_range' - 'for_each_old_connector_in_state' - 'for_each_old_crtc_in_state' - 'for_each_oldnew_connector_in_state' - 'for_each_oldnew_crtc_in_state' - 'for_each_oldnew_plane_in_state' + - 'for_each_oldnew_plane_in_state_reverse' - 'for_each_oldnew_private_obj_in_state' - 'for_each_old_plane_in_state' - 'for_each_old_private_obj_in_state' @@ -206,14 +231,17 @@ ForEachMacros: - 'for_each_process' - 'for_each_process_thread' - 'for_each_property_of_node' + - 'for_each_registered_fb' - 'for_each_reserved_mem_region' - - 'for_each_resv_unavail_range' + - 'for_each_rtd_codec_dai' + - 'for_each_rtd_codec_dai_rollback' - 'for_each_rtdcom' - 'for_each_rtdcom_safe' - 'for_each_set_bit' - 'for_each_set_bit_from' - 'for_each_sg' - 'for_each_sg_page' + - 'for_each_sibling_event' - '__for_each_thread' - 'for_each_thread' - 'for_each_zone' @@ -251,6 +279,8 @@ ForEachMacros: - 'hlist_nulls_for_each_entry_from' - 'hlist_nulls_for_each_entry_rcu' - 'hlist_nulls_for_each_entry_safe' + - 'i3c_bus_for_each_i2cdev' + - 'i3c_bus_for_each_i3cdev' - 'ide_host_for_each_port' - 'ide_port_for_each_dev' - 'ide_port_for_each_present_dev' @@ -267,11 +297,14 @@ ForEachMacros: - 'kvm_for_each_memslot' - 'kvm_for_each_vcpu' - 'list_for_each' + - 'list_for_each_codec' + - 'list_for_each_codec_safe' - 'list_for_each_entry' - 'list_for_each_entry_continue' - 'list_for_each_entry_continue_rcu' - 'list_for_each_entry_continue_reverse' - 'list_for_each_entry_from' + - 'list_for_each_entry_from_rcu' - 'list_for_each_entry_from_reverse' - 'list_for_each_entry_lockless' - 'list_for_each_entry_rcu' @@ -291,6 +324,7 @@ ForEachMacros: - 'media_device_for_each_intf' - 'media_device_for_each_link' - 'media_device_for_each_pad' + - 'nanddev_io_for_each_page' - 'netdev_for_each_lower_dev' - 'netdev_for_each_lower_private' - 'netdev_for_each_lower_private_rcu' @@ -357,12 +391,14 @@ ForEachMacros: - 'sk_nulls_for_each' - 'sk_nulls_for_each_from' - 'sk_nulls_for_each_rcu' + - 'snd_array_for_each' - 'snd_pcm_group_for_each_entry' - 'snd_soc_dapm_widget_for_each_path' - 'snd_soc_dapm_widget_for_each_path_safe' - 'snd_soc_dapm_widget_for_each_sink_path' - 'snd_soc_dapm_widget_for_each_source_path' - 'tb_property_for_each' + - 'tcf_exts_for_each_action' - 'udp_portaddr_for_each_entry' - 'udp_portaddr_for_each_entry_rcu' - 'usb_hub_for_each_child' @@ -371,6 +407,11 @@ ForEachMacros: - 'v4l2_m2m_for_each_dst_buf_safe' - 'v4l2_m2m_for_each_src_buf' - 'v4l2_m2m_for_each_src_buf_safe' + - 'virtio_device_for_each_vq' + - 'xa_for_each' + - 'xas_for_each' + - 'xas_for_each_conflict' + - 'xas_for_each_marked' - 'zorro_for_each_dev' #IncludeBlocks: Preserve # Unknown to clang-format-5.0 diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst new file mode 100644 index 000000000000..1d434c3a268d --- /dev/null +++ b/Documentation/bpf/btf.rst @@ -0,0 +1,870 @@ +===================== +BPF Type Format (BTF) +===================== + +1. Introduction +*************** + +BTF (BPF Type Format) is the meta data format which +encodes the debug info related to BPF program/map. +The name BTF was used initially to describe +data types. The BTF was later extended to include +function info for defined subroutines, and line info +for source/line information. + +The debug info is used for map pretty print, function +signature, etc. The function signature enables better +bpf program/function kernel symbol. +The line info helps generate +source annotated translated byte code, jited code +and verifier log. + +The BTF specification contains two parts, + * BTF kernel API + * BTF ELF file format + +The kernel API is the contract between +user space and kernel. The kernel verifies +the BTF info before using it. +The ELF file format is a user space contract +between ELF file and libbpf loader. + +The type and string sections are part of the +BTF kernel API, describing the debug info +(mostly types related) referenced by the bpf program. +These two sections are discussed in +details in :ref:`BTF_Type_String`. + +.. _BTF_Type_String: + +2. BTF Type and String Encoding +******************************* + +The file ``include/uapi/linux/btf.h`` provides high +level definition on how types/strings are encoded. + +The beginning of data blob must be:: + + struct btf_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 type_off; /* offset of type section */ + __u32 type_len; /* length of type section */ + __u32 str_off; /* offset of string section */ + __u32 str_len; /* length of string section */ + }; + +The magic is ``0xeB9F``, which has different encoding for big and little +endian system, and can be used to test whether BTF is generated for +big or little endian target. +The btf_header is designed to be extensible with hdr_len equal to +``sizeof(struct btf_header)`` when the data blob is generated. + +2.1 String Encoding +=================== + +The first string in the string section must be a null string. +The rest of string table is a concatenation of other null-treminated +strings. + +2.2 Type Encoding +================= + +The type id ``0`` is reserved for ``void`` type. +The type section is parsed sequentially and the type id is assigned to +each recognized type starting from id ``1``. +Currently, the following types are supported:: + + #define BTF_KIND_INT 1 /* Integer */ + #define BTF_KIND_PTR 2 /* Pointer */ + #define BTF_KIND_ARRAY 3 /* Array */ + #define BTF_KIND_STRUCT 4 /* Struct */ + #define BTF_KIND_UNION 5 /* Union */ + #define BTF_KIND_ENUM 6 /* Enumeration */ + #define BTF_KIND_FWD 7 /* Forward */ + #define BTF_KIND_TYPEDEF 8 /* Typedef */ + #define BTF_KIND_VOLATILE 9 /* Volatile */ + #define BTF_KIND_CONST 10 /* Const */ + #define BTF_KIND_RESTRICT 11 /* Restrict */ + #define BTF_KIND_FUNC 12 /* Function */ + #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ + +Note that the type section encodes debug info, not just pure types. +``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram. + +Each type contains the following common data:: + + struct btf_type { + __u32 name_off; + /* "info" bits arrangement + * bits 0-15: vlen (e.g. # of struct's members) + * bits 16-23: unused + * bits 24-27: kind (e.g. int, ptr, array...etc) + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd + */ + __u32 info; + /* "size" is used by INT, ENUM, STRUCT and UNION. + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. + * "type" is a type_id referring to another type. + */ + union { + __u32 size; + __u32 type; + }; + }; + +For certain kinds, the common data are followed by kind specific data. +The ``name_off`` in ``struct btf_type`` specifies the offset in the string table. +The following details encoding of each kind. + +2.2.1 BTF_KIND_INT +~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: any valid offset + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_INT + * ``info.vlen``: 0 + * ``size``: the size of the int type in bytes. + +``btf_type`` is followed by a ``u32`` with following bits arrangement:: + + #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) + #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) + #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) + +The ``BTF_INT_ENCODING`` has the following attributes:: + + #define BTF_INT_SIGNED (1 << 0) + #define BTF_INT_CHAR (1 << 1) + #define BTF_INT_BOOL (1 << 2) + +The ``BTF_INT_ENCODING()`` provides extra information, signness, +char, or bool, for the int type. The char and bool encoding +are mostly useful for pretty print. At most one encoding can +be specified for the int type. + +The ``BTF_INT_BITS()`` specifies the number of actual bits held by +this int type. For example, a 4-bit bitfield encodes +``BTF_INT_BITS()`` equals to 4. The ``btf_type.size * 8`` +must be equal to or greater than ``BTF_INT_BITS()`` for the type. +The maximum value of ``BTF_INT_BITS()`` is 128. + +The ``BTF_INT_OFFSET()`` specifies the starting bit offset to +calculate values for this int. For example, a bitfield struct +member has + + * btf member bit offset 100 from the start of the structure, + * btf member pointing to an int type, + * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4`` + +Then in the struct memory layout, this member will occupy +``4`` bits starting from bits ``100 + 2 = 102``. + +Alternatively, the bitfield struct member can be the following to +access the same bits as the above: + + * btf member bit offset 102, + * btf member pointing to an int type, + * the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4`` + +The original intention of ``BTF_INT_OFFSET()`` is to provide +flexibility of bitfield encoding. +Currently, both llvm and pahole generates ``BTF_INT_OFFSET() = 0`` +for all int types. + +2.2.2 BTF_KIND_PTR +~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_PTR + * ``info.vlen``: 0 + * ``type``: the pointee type of the pointer + +No additional type data follow ``btf_type``. + +2.2.3 BTF_KIND_ARRAY +~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_ARRAY + * ``info.vlen``: 0 + * ``size/type``: 0, not used + +btf_type is followed by one "struct btf_array":: + + struct btf_array { + __u32 type; + __u32 index_type; + __u32 nelems; + }; + +The ``struct btf_array`` encoding: + * ``type``: the element type + * ``index_type``: the index type + * ``nelems``: the number of elements for this array (``0`` is also allowed). + +The ``index_type`` can be any regular int types +(u8, u16, u32, u64, unsigned __int128). +The original design of including ``index_type`` follows dwarf +which has a ``index_type`` for its array type. +Currently in BTF, beyond type verification, the ``index_type`` is not used. + +The ``struct btf_array`` allows chaining through element type to represent +multiple dimensional arrays. For example, ``int a[5][6]``, the following +type system illustrates the chaining: + + * [1]: int + * [2]: array, ``btf_array.type = [1]``, ``btf_array.nelems = 6`` + * [3]: array, ``btf_array.type = [2]``, ``btf_array.nelems = 5`` + +Currently, both pahole and llvm collapse multiple dimensional array +into one dimensional array, e.g., ``a[5][6]``, the btf_array.nelems +equal to ``30``. This is because the original use case is map pretty +print where the whole array is dumped out so one dimensional array +is enough. As more BTF usage is explored, pahole and llvm can be +changed to generate proper chained representation for +multiple dimensional arrays. + +2.2.4 BTF_KIND_STRUCT +~~~~~~~~~~~~~~~~~~~~~ +2.2.5 BTF_KIND_UNION +~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 or offset to a valid C identifier + * ``info.kind_flag``: 0 or 1 + * ``info.kind``: BTF_KIND_STRUCT or BTF_KIND_UNION + * ``info.vlen``: the number of struct/union members + * ``info.size``: the size of the struct/union in bytes + +``btf_type`` is followed by ``info.vlen`` number of ``struct btf_member``.:: + + struct btf_member { + __u32 name_off; + __u32 type; + __u32 offset; + }; + +``struct btf_member`` encoding: + * ``name_off``: offset to a valid C identifier + * ``type``: the member type + * ``offset``: + +If the type info ``kind_flag`` is not set, the offset contains +only bit offset of the member. Note that the base type of the +bitfield can only be int or enum type. If the bitfield size +is 32, the base type can be either int or enum type. +If the bitfield size is not 32, the base type must be int, +and int type ``BTF_INT_BITS()`` encodes the bitfield size. + +If the ``kind_flag`` is set, the ``btf_member.offset`` +contains both member bitfield size and bit offset. The +bitfield size and bit offset are calculated as below.:: + + #define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) + #define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + +In this case, if the base type is an int type, it must +be a regular int type: + + * ``BTF_INT_OFFSET()`` must be 0. + * ``BTF_INT_BITS()`` must be equal to ``{1,2,4,8,16} * 8``. + +The following kernel patch introduced ``kind_flag`` and +explained why both modes exist: + + https://github.com/torvalds/linux/commit/9d5f9f701b1891466fb3dbb1806ad97716f95cc3#diff-fa650a64fdd3968396883d2fe8215ff3 + +2.2.6 BTF_KIND_ENUM +~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 or offset to a valid C identifier + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_ENUM + * ``info.vlen``: number of enum values + * ``size``: 4 + +``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.:: + + struct btf_enum { + __u32 name_off; + __s32 val; + }; + +The ``btf_enum`` encoding: + * ``name_off``: offset to a valid C identifier + * ``val``: any value + +2.2.7 BTF_KIND_FWD +~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: offset to a valid C identifier + * ``info.kind_flag``: 0 for struct, 1 for union + * ``info.kind``: BTF_KIND_FWD + * ``info.vlen``: 0 + * ``type``: 0 + +No additional type data follow ``btf_type``. + +2.2.8 BTF_KIND_TYPEDEF +~~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: offset to a valid C identifier + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_TYPEDEF + * ``info.vlen``: 0 + * ``type``: the type which can be referred by name at ``name_off`` + +No additional type data follow ``btf_type``. + +2.2.9 BTF_KIND_VOLATILE +~~~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_VOLATILE + * ``info.vlen``: 0 + * ``type``: the type with ``volatile`` qualifier + +No additional type data follow ``btf_type``. + +2.2.10 BTF_KIND_CONST +~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_CONST + * ``info.vlen``: 0 + * ``type``: the type with ``const`` qualifier + +No additional type data follow ``btf_type``. + +2.2.11 BTF_KIND_RESTRICT +~~~~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_RESTRICT + * ``info.vlen``: 0 + * ``type``: the type with ``restrict`` qualifier + +No additional type data follow ``btf_type``. + +2.2.12 BTF_KIND_FUNC +~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: offset to a valid C identifier + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_FUNC + * ``info.vlen``: 0 + * ``type``: a BTF_KIND_FUNC_PROTO type + +No additional type data follow ``btf_type``. + +A BTF_KIND_FUNC defines, not a type, but a subprogram (function) whose +signature is defined by ``type``. The subprogram is thus an instance of +that type. The BTF_KIND_FUNC may in turn be referenced by a func_info in +the :ref:`BTF_Ext_Section` (ELF) or in the arguments to +:ref:`BPF_Prog_Load` (ABI). + +2.2.13 BTF_KIND_FUNC_PROTO +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``struct btf_type`` encoding requirement: + * ``name_off``: 0 + * ``info.kind_flag``: 0 + * ``info.kind``: BTF_KIND_FUNC_PROTO + * ``info.vlen``: # of parameters + * ``type``: the return type + +``btf_type`` is followed by ``info.vlen`` number of ``struct btf_param``.:: + + struct btf_param { + __u32 name_off; + __u32 type; + }; + +If a BTF_KIND_FUNC_PROTO type is referred by a BTF_KIND_FUNC type, +then ``btf_param.name_off`` must point to a valid C identifier +except for the possible last argument representing the variable +argument. The btf_param.type refers to parameter type. + +If the function has variable arguments, the last parameter +is encoded with ``name_off = 0`` and ``type = 0``. + +3. BTF Kernel API +***************** + +The following bpf syscall command involves BTF: + * BPF_BTF_LOAD: load a blob of BTF data into kernel + * BPF_MAP_CREATE: map creation with btf key and value type info. + * BPF_PROG_LOAD: prog load with btf function and line info. + * BPF_BTF_GET_FD_BY_ID: get a btf fd + * BPF_OBJ_GET_INFO_BY_FD: btf, func_info, line_info + and other btf related info are returned. + +The workflow typically looks like: +:: + + Application: + BPF_BTF_LOAD + | + v + BPF_MAP_CREATE and BPF_PROG_LOAD + | + V + ...... + + Introspection tool: + ...... + BPF_{PROG,MAP}_GET_NEXT_ID (get prog/map id's) + | + V + BPF_{PROG,MAP}_GET_FD_BY_ID (get a prog/map fd) + | + V + BPF_OBJ_GET_INFO_BY_FD (get bpf_prog_info/bpf_map_info with btf_id) + | | + V | + BPF_BTF_GET_FD_BY_ID (get btf_fd) | + | | + V | + BPF_OBJ_GET_INFO_BY_FD (get btf) | + | | + V V + pretty print types, dump func signatures and line info, etc. + + +3.1 BPF_BTF_LOAD +================ + +Load a blob of BTF data into kernel. A blob of data +described in :ref:`BTF_Type_String` +can be directly loaded into the kernel. +A ``btf_fd`` returns to userspace. + +3.2 BPF_MAP_CREATE +================== + +A map can be created with ``btf_fd`` and specified key/value type id.:: + + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ + +In libbpf, the map can be defined with extra annotation like below: +:: + + struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, + }; + BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); + +Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, +key and value types for the map. +During ELF parsing, libbpf is able to extract key/value type_id's +and assigned them to BPF_MAP_CREATE attributes automatically. + +.. _BPF_Prog_Load: + +3.3 BPF_PROG_LOAD +================= + +During prog_load, func_info and line_info can be passed to kernel with +proper values for the following attributes: +:: + + __u32 insn_cnt; + __aligned_u64 insns; + ...... + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ + +The func_info and line_info are an array of below, respectively.:: + + struct bpf_func_info { + __u32 insn_off; /* [0, insn_cnt - 1] */ + __u32 type_id; /* pointing to a BTF_KIND_FUNC type */ + }; + struct bpf_line_info { + __u32 insn_off; /* [0, insn_cnt - 1] */ + __u32 file_name_off; /* offset to string table for the filename */ + __u32 line_off; /* offset to string table for the source line */ + __u32 line_col; /* line number and column number */ + }; + +func_info_rec_size is the size of each func_info record, and line_info_rec_size +is the size of each line_info record. Passing the record size to kernel make +it possible to extend the record itself in the future. + +Below are requirements for func_info: + * func_info[0].insn_off must be 0. + * the func_info insn_off is in strictly increasing order and matches + bpf func boundaries. + +Below are requirements for line_info: + * the first insn in each func must points to a line_info record. + * the line_info insn_off is in strictly increasing order. + +For line_info, the line number and column number are defined as below: +:: + + #define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) + #define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +3.4 BPF_{PROG,MAP}_GET_NEXT_ID + +In kernel, every loaded program, map or btf has a unique id. +The id won't change during the life time of the program, map or btf. + +The bpf syscall command BPF_{PROG,MAP}_GET_NEXT_ID +returns all id's, one for each command, to user space, for bpf +program or maps, +so the inspection tool can inspect all programs and maps. + +3.5 BPF_{PROG,MAP}_GET_FD_BY_ID + +The introspection tool cannot use id to get details about program or maps. +A file descriptor needs to be obtained first for reference counting purpose. + +3.6 BPF_OBJ_GET_INFO_BY_FD +========================== + +Once a program/map fd is acquired, the introspection tool can +get the detailed information from kernel about this fd, +some of which is btf related. For example, +``bpf_map_info`` returns ``btf_id``, key/value type id. +``bpf_prog_info`` returns ``btf_id``, func_info and line info +for translated bpf byte codes, and jited_line_info. + +3.7 BPF_BTF_GET_FD_BY_ID +======================== + +With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``, +bpf syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd. +Then, with command BPF_OBJ_GET_INFO_BY_FD, the btf blob, originally +loaded into the kernel with BPF_BTF_LOAD, can be retrieved. + +With the btf blob, ``bpf_map_info`` and ``bpf_prog_info``, the introspection +tool has full btf knowledge and is able to pretty print map key/values, +dump func signatures, dump line info along with byte/jit codes. + +4. ELF File Format Interface +**************************** + +4.1 .BTF section +================ + +The .BTF section contains type and string data. The format of this section +is same as the one describe in :ref:`BTF_Type_String`. + +.. _BTF_Ext_Section: + +4.2 .BTF.ext section +==================== + +The .BTF.ext section encodes func_info and line_info which +needs loader manipulation before loading into the kernel. + +The specification for .BTF.ext section is defined at +``tools/lib/bpf/btf.h`` and ``tools/lib/bpf/btf.c``. + +The current header of .BTF.ext section:: + + struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; + }; + +It is very similar to .BTF section. Instead of type/string section, +it contains func_info and line_info section. See :ref:`BPF_Prog_Load` +for details about func_info and line_info record format. + +The func_info is organized as below.:: + + func_info_rec_size + btf_ext_info_sec for section #1 /* func_info for section #1 */ + btf_ext_info_sec for section #2 /* func_info for section #2 */ + ... + +``func_info_rec_size`` specifies the size of ``bpf_func_info`` structure +when .BTF.ext is generated. btf_ext_info_sec, defined below, is +the func_info for each specific ELF section.:: + + struct btf_ext_info_sec { + __u32 sec_name_off; /* offset to section name */ + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; + }; + +Here, num_info must be greater than 0. + +The line_info is organized as below.:: + + line_info_rec_size + btf_ext_info_sec for section #1 /* line_info for section #1 */ + btf_ext_info_sec for section #2 /* line_info for section #2 */ + ... + +``line_info_rec_size`` specifies the size of ``bpf_line_info`` structure +when .BTF.ext is generated. + +The interpretation of ``bpf_func_info->insn_off`` and +``bpf_line_info->insn_off`` is different between kernel API and ELF API. +For kernel API, the ``insn_off`` is the instruction offset in the unit +of ``struct bpf_insn``. For ELF API, the ``insn_off`` is the byte offset +from the beginning of section (``btf_ext_info_sec->sec_name_off``). + +5. Using BTF +************ + +5.1 bpftool map pretty print +============================ + +With BTF, the map key/value can be printed based on fields rather than +simply raw bytes. This is especially +valuable for large structure or if you data structure +has bitfields. For example, for the following map,:: + + enum A { A1, A2, A3, A4, A5 }; + typedef enum A ___A; + struct tmp_t { + char a1:4; + int a2:4; + int :4; + __u32 a3:4; + int b; + ___A b1:4; + enum A b2:4; + }; + struct bpf_map_def SEC("maps") tmpmap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tmp_t), + .max_entries = 1, + }; + BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t); + +bpftool is able to pretty print like below: +:: + + [{ + "key": 0, + "value": { + "a1": 0x2, + "a2": 0x4, + "a3": 0x6, + "b": 7, + "b1": 0x8, + "b2": 0xa + } + } + ] + +5.2 bpftool prog dump +===================== + +The following is an example to show func_info and line_info +can help prog dump with better kernel symbol name, function prototype +and line information.:: + + $ bpftool prog dump jited pinned /sys/fs/bpf/test_btf_haskv + [...] + int test_long_fname_2(struct dummy_tracepoint_args * arg): + bpf_prog_44a040bf25481309_test_long_fname_2: + ; static int test_long_fname_2(struct dummy_tracepoint_args *arg) + 0: push %rbp + 1: mov %rsp,%rbp + 4: sub $0x30,%rsp + b: sub $0x28,%rbp + f: mov %rbx,0x0(%rbp) + 13: mov %r13,0x8(%rbp) + 17: mov %r14,0x10(%rbp) + 1b: mov %r15,0x18(%rbp) + 1f: xor %eax,%eax + 21: mov %rax,0x20(%rbp) + 25: xor %esi,%esi + ; int key = 0; + 27: mov %esi,-0x4(%rbp) + ; if (!arg->sock) + 2a: mov 0x8(%rdi),%rdi + ; if (!arg->sock) + 2e: cmp $0x0,%rdi + 32: je 0x0000000000000070 + 34: mov %rbp,%rsi + ; counts = bpf_map_lookup_elem(&btf_map, &key); + [...] + +5.3 verifier log +================ + +The following is an example how line_info can help verifier failure debug.:: + + /* The code at tools/testing/selftests/bpf/test_xdp_noinline.c + * is modified as below. + */ + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + /* + if (data + 4 > data_end) + return XDP_DROP; + */ + *(u32 *)data = dst->dst; + + $ bpftool prog load ./test_xdp_noinline.o /sys/fs/bpf/test_xdp_noinline type xdp + ; data = (void *)(long)xdp->data; + 224: (79) r2 = *(u64 *)(r10 -112) + 225: (61) r2 = *(u32 *)(r2 +0) + ; *(u32 *)data = dst->dst; + 226: (63) *(u32 *)(r2 +0) = r1 + invalid access to packet, off=0 size=4, R2(id=0,off=0,r=0) + R2 offset is outside of the packet + +6. BTF Generation +***************** + +You need latest pahole + + https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ + +or llvm (8.0 or later). The pahole acts as a dwarf2btf converter. It doesn't support .BTF.ext +and btf BTF_KIND_FUNC type yet. For example,:: + + -bash-4.4$ cat t.c + struct t { + int a:2; + int b:3; + int c:2; + } g; + -bash-4.4$ gcc -c -O2 -g t.c + -bash-4.4$ pahole -JV t.o + File t.o: + [1] STRUCT t kind_flag=1 size=4 vlen=3 + a type_id=2 bitfield_size=2 bits_offset=0 + b type_id=2 bitfield_size=3 bits_offset=2 + c type_id=2 bitfield_size=2 bits_offset=5 + [2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED + +The llvm is able to generate .BTF and .BTF.ext directly with -g for bpf target only. +The assembly code (-S) is able to show the BTF encoding in assembly format.:: + + -bash-4.4$ cat t2.c + typedef int __int32; + struct t2 { + int a2; + int (*f2)(char q1, __int32 q2, ...); + int (*f3)(); + } g2; + int main() { return 0; } + int test() { return 0; } + -bash-4.4$ clang -c -g -O2 -target bpf t2.c + -bash-4.4$ readelf -S t2.o + ...... + [ 8] .BTF PROGBITS 0000000000000000 00000247 + 000000000000016e 0000000000000000 0 0 1 + [ 9] .BTF.ext PROGBITS 0000000000000000 000003b5 + 0000000000000060 0000000000000000 0 0 1 + [10] .rel.BTF.ext REL 0000000000000000 000007e0 + 0000000000000040 0000000000000010 16 9 8 + ...... + -bash-4.4$ clang -S -g -O2 -target bpf t2.c + -bash-4.4$ cat t2.s + ...... + .section .BTF,"",@progbits + .short 60319 # 0xeb9f + .byte 1 + .byte 0 + .long 24 + .long 0 + .long 220 + .long 220 + .long 122 + .long 0 # BTF_KIND_FUNC_PROTO(id = 1) + .long 218103808 # 0xd000000 + .long 2 + .long 83 # BTF_KIND_INT(id = 2) + .long 16777216 # 0x1000000 + .long 4 + .long 16777248 # 0x1000020 + ...... + .byte 0 # string offset=0 + .ascii ".text" # string offset=1 + .byte 0 + .ascii "/home/yhs/tmp-pahole/t2.c" # string offset=7 + .byte 0 + .ascii "int main() { return 0; }" # string offset=33 + .byte 0 + .ascii "int test() { return 0; }" # string offset=58 + .byte 0 + .ascii "int" # string offset=83 + ...... + .section .BTF.ext,"",@progbits + .short 60319 # 0xeb9f + .byte 1 + .byte 0 + .long 24 + .long 0 + .long 28 + .long 28 + .long 44 + .long 8 # FuncInfo + .long 1 # FuncInfo section string offset=1 + .long 2 + .long .Lfunc_begin0 + .long 3 + .long .Lfunc_begin1 + .long 5 + .long 16 # LineInfo + .long 1 # LineInfo section string offset=1 + .long 2 + .long .Ltmp0 + .long 7 + .long 33 + .long 7182 # Line 7 Col 14 + .long .Ltmp3 + .long 7 + .long 58 + .long 8206 # Line 8 Col 14 + +7. Testing +********** + +Kernel bpf selftest `test_btf.c` provides extensive set of BTF related tests. diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 00a8450a602f..4e77932959cc 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -15,6 +15,13 @@ that goes into great technical depth about the BPF Architecture. The primary info for the bpf syscall is available in the `man-pages`_ for `bpf(2)`_. +BPF Type Format (BTF) +===================== + +.. toctree:: + :maxdepth: 1 + + btf Frequently asked questions (FAQ) diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 6a6d67acaf69..5d54b27c6eba 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -108,12 +108,13 @@ some, but not all of the other indices changing. Sometimes you need to ensure that a subsequent call to :c:func:`xa_store` will not need to allocate memory. The :c:func:`xa_reserve` function -will store a reserved entry at the indicated index. Users of the normal -API will see this entry as containing ``NULL``. If you do not need to -use the reserved entry, you can call :c:func:`xa_release` to remove the -unused entry. If another user has stored to the entry in the meantime, -:c:func:`xa_release` will do nothing; if instead you want the entry to -become ``NULL``, you should use :c:func:`xa_erase`. +will store a reserved entry at the indicated index. Users of the +normal API will see this entry as containing ``NULL``. If you do +not need to use the reserved entry, you can call :c:func:`xa_release` +to remove the unused entry. If another user has stored to the entry +in the meantime, :c:func:`xa_release` will do nothing; if instead you +want the entry to become ``NULL``, you should use :c:func:`xa_erase`. +Using :c:func:`xa_insert` on a reserved entry will fail. If all entries in the array are ``NULL``, the :c:func:`xa_empty` function will return ``true``. @@ -183,6 +184,8 @@ Takes xa_lock internally: * :c:func:`xa_store_bh` * :c:func:`xa_store_irq` * :c:func:`xa_insert` + * :c:func:`xa_insert_bh` + * :c:func:`xa_insert_irq` * :c:func:`xa_erase` * :c:func:`xa_erase_bh` * :c:func:`xa_erase_irq` diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt index 84262cdb8d29..96fa46cb133c 100644 --- a/Documentation/devicetree/bindings/arm/cpu-capacity.txt +++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt @@ -235,4 +235,4 @@ cpus { =========================================== [1] ARM Linux Kernel documentation - CPUs bindings - Documentation/devicetree/bindings/arm/cpus.txt + Documentation/devicetree/bindings/arm/cpus.yaml diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt index 8f0937db55c5..45730ba60af5 100644 --- a/Documentation/devicetree/bindings/arm/idle-states.txt +++ b/Documentation/devicetree/bindings/arm/idle-states.txt @@ -684,7 +684,7 @@ cpus { =========================================== [1] ARM Linux Kernel documentation - CPUs bindings - Documentation/devicetree/bindings/arm/cpus.txt + Documentation/devicetree/bindings/arm/cpus.yaml [2] ARM Linux Kernel documentation - PSCI bindings Documentation/devicetree/bindings/arm/psci.txt diff --git a/Documentation/devicetree/bindings/arm/sp810.txt b/Documentation/devicetree/bindings/arm/sp810.txt index 1b2ab1ff5587..46652bf65147 100644 --- a/Documentation/devicetree/bindings/arm/sp810.txt +++ b/Documentation/devicetree/bindings/arm/sp810.txt @@ -4,7 +4,7 @@ SP810 System Controller Required properties: - compatible: standard compatible string for a Primecell peripheral, - see Documentation/devicetree/bindings/arm/primecell.txt + see Documentation/devicetree/bindings/arm/primecell.yaml for more details should be: "arm,sp810", "arm,primecell" diff --git a/Documentation/devicetree/bindings/arm/topology.txt b/Documentation/devicetree/bindings/arm/topology.txt index de9eb0486630..b0d80c0fb265 100644 --- a/Documentation/devicetree/bindings/arm/topology.txt +++ b/Documentation/devicetree/bindings/arm/topology.txt @@ -472,4 +472,4 @@ cpus { =============================================================================== [1] ARM Linux kernel documentation - Documentation/devicetree/bindings/arm/cpus.txt + Documentation/devicetree/bindings/arm/cpus.yaml diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt index af376a01f2b7..23b52dc02266 100644 --- a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt +++ b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt @@ -18,4 +18,4 @@ Required Properties: Each clock is assigned an identifier and client nodes use this identifier to specify the clock which they consume. -All these identifier could be found in . +All these identifiers could be found in . diff --git a/Documentation/devicetree/bindings/display/arm,pl11x.txt b/Documentation/devicetree/bindings/display/arm,pl11x.txt index ef89ab46b2c9..572fa2773ec4 100644 --- a/Documentation/devicetree/bindings/display/arm,pl11x.txt +++ b/Documentation/devicetree/bindings/display/arm,pl11x.txt @@ -1,6 +1,6 @@ * ARM PrimeCell Color LCD Controller PL110/PL111 -See also Documentation/devicetree/bindings/arm/primecell.txt +See also Documentation/devicetree/bindings/arm/primecell.yaml Required properties: diff --git a/Documentation/devicetree/bindings/display/msm/gpu.txt b/Documentation/devicetree/bindings/display/msm/gpu.txt index ac8df3b871f9..f8759145ce1a 100644 --- a/Documentation/devicetree/bindings/display/msm/gpu.txt +++ b/Documentation/devicetree/bindings/display/msm/gpu.txt @@ -27,7 +27,6 @@ Example: reg = <0x04300000 0x20000>; reg-names = "kgsl_3d0_reg_memory"; interrupts = ; - interrupt-names = "kgsl_3d0_irq"; clock-names = "core", "iface", diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt index 38ca2201e8ae..2e097b57f170 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt @@ -14,8 +14,6 @@ Required properties: "marvell,armada-8k-gpio" should be used for the Armada 7K and 8K SoCs (either from AP or CP), see - Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt - and Documentation/devicetree/bindings/arm/marvell/ap806-system-controller.txt for specific details about the offset property. diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt index b83bb8249074..a3be5298a5eb 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt @@ -78,7 +78,7 @@ Sub-nodes: PPI affinity can be expressed as a single "ppi-partitions" node, containing a set of sub-nodes, each with the following property: - affinity: Should be a list of phandles to CPU nodes (as described in -Documentation/devicetree/bindings/arm/cpus.txt). + Documentation/devicetree/bindings/arm/cpus.yaml). GICv3 has one or more Interrupt Translation Services (ITS) that are used to route Message Signalled Interrupts (MSI) to the CPUs. diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt index aa3527f71fdc..47aa205ee0bd 100644 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -3,12 +3,16 @@ Mediatek MT7530 Ethernet switch Required properties: -- compatible: Must be compatible = "mediatek,mt7530"; +- compatible: may be compatible = "mediatek,mt7530" + or compatible = "mediatek,mt7621" - #address-cells: Must be 1. - #size-cells: Must be 0. - mediatek,mcm: Boolean; if defined, indicates that either MT7530 is the part on multi-chip module belong to MT7623A has or the remotely standalone chip as the function MT7623N reference board provided for. + +If compatible mediatek,mt7530 is set then the following properties are required + - core-supply: Phandle to the regulator node necessary for the core power. - io-supply: Phandle to the regulator node necessary for the I/O power. See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.txt b/Documentation/devicetree/bindings/net/qcom,ethqos.txt new file mode 100644 index 000000000000..fcf5035810b5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom,ethqos.txt @@ -0,0 +1,64 @@ +Qualcomm Ethernet ETHQOS device + +This documents dwmmac based ethernet device which supports Gigabit +ethernet for version v2.3.0 onwards. + +This device has following properties: + +Required properties: + +- compatible: Should be qcom,qcs404-ethqos" + +- reg: Address and length of the register set for the device + +- reg-names: Should contain register names "stmmaceth", "rgmii" + +- clocks: Should contain phandle to clocks + +- clock-names: Should contain clock names "stmmaceth", "pclk", + "ptp_ref", "rgmii" + +- interrupts: Should contain phandle to interrupts + +- interrupt-names: Should contain interrupt names "macirq", "eth_lpi" + +Rest of the properties are defined in stmmac.txt file in same directory + + +Example: + +ethernet: ethernet@7a80000 { + compatible = "qcom,qcs404-ethqos"; + reg = <0x07a80000 0x10000>, + <0x07a96000 0x100>; + reg-names = "stmmaceth", "rgmii"; + clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii"; + clocks = <&gcc GCC_ETH_AXI_CLK>, + <&gcc GCC_ETH_SLAVE_AHB_CLK>, + <&gcc GCC_ETH_PTP_CLK>, + <&gcc GCC_ETH_RGMII_CLK>; + interrupts = , + ; + interrupt-names = "macirq", "eth_lpi"; + snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>; + snps,reset-active-low; + + snps,txpbl = <8>; + snps,rxpbl = <2>; + snps,aal; + snps,tso; + + phy-handle = <&phy1>; + phy-mode = "rgmii"; + + mdio { + #address-cells = <0x1>; + #size-cells = <0x0>; + compatible = "snps,dwmac-mdio"; + phy1: phy@4 { + device_type = "ethernet-phy"; + reg = <0x4>; + }; + }; + +}; diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt index c5d0e7998e2b..8e7f8551d190 100644 --- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt +++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt @@ -17,6 +17,8 @@ Clock Properties: - fsl,tmr-fiper1 Fixed interval period pulse generator. - fsl,tmr-fiper2 Fixed interval period pulse generator. - fsl,max-adj Maximum frequency adjustment in parts per billion. + - fsl,extts-fifo The presence of this property indicates hardware + support for the external trigger stamp FIFO. These properties set the operational parameters for the PTP clock. You must choose these carefully for the clock to work right. diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt index 0b8cc533ca83..cf759e5f9b10 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,glink.txt @@ -55,7 +55,7 @@ of these nodes are defined by the individual bindings for the specific function = EXAMPLE The following example represents the GLINK RPM node on a MSM8996 device, with the function for the "rpm_request" channel defined, which is used for -regualtors and root clocks. +regulators and root clocks. apcs_glb: mailbox@9820000 { compatible = "qcom,msm8996-apcs-hmss-global"; diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt index a35af2dafdad..49e1d72d3648 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smp2p.txt @@ -41,12 +41,12 @@ processor ID) and a string identifier. - qcom,local-pid: Usage: required Value type: - Definition: specifies the identfier of the local endpoint of this edge + Definition: specifies the identifier of the local endpoint of this edge - qcom,remote-pid: Usage: required Value type: - Definition: specifies the identfier of the remote endpoint of this edge + Definition: specifies the identifier of the remote endpoint of this edge = SUBNODES Each SMP2P pair contain a set of inbound and outbound entries, these are diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt index 62af30511a95..60a5ec04e8f0 100644 --- a/Documentation/fb/fbcon.txt +++ b/Documentation/fb/fbcon.txt @@ -163,6 +163,14 @@ C. Boot options be preserved until there actually is some text is output to the console. This option causes fbcon to bind immediately to the fbdev device. +7. fbcon=logo-pos: + + The only possible 'location' is 'center' (without quotes), and when + given, the bootup logo is moved from the default top-left corner + location to the center of the framebuffer. If more than one logo is + displayed due to multiple CPUs, the collected line of logos is moved + as a whole. + C. Attaching, Detaching and Unloading Before going on to how to attach, detach and unload the framebuffer console, an diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt deleted file mode 100644 index 1db3fbea0831..000000000000 --- a/Documentation/networking/devlink-health.txt +++ /dev/null @@ -1,86 +0,0 @@ -The health mechanism is targeted for Real Time Alerting, in order to know when -something bad had happened to a PCI device -- Provide alert debug information -- Self healing -- If problem needs vendor support, provide a way to gather all needed debugging - information. - -The main idea is to unify and centralize driver health reports in the -generic devlink instance and allow the user to set different -attributes of the health reporting and recovery procedures. - -The devlink health reporter: -Device driver creates a "health reporter" per each error/health type. -Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error) -or unknown (driver specific). -For each registered health reporter a driver can issue error/health reports -asynchronously. All health reports handling is done by devlink. -Device driver can provide specific callbacks for each "health reporter", e.g. - - Recovery procedures - - Diagnostics and object dump procedures - - OOB initial parameters -Different parts of the driver can register different types of health reporters -with different handlers. - -Once an error is reported, devlink health will do the following actions: - * A log is being send to the kernel trace events buffer - * Health status and statistics are being updated for the reporter instance - * Object dump is being taken and saved at the reporter instance (as long as - there is no other dump which is already stored) - * Auto recovery attempt is being done. Depends on: - - Auto-recovery configuration - - Grace period vs. time passed since last recover - -The user interface: -User can access/change each reporter's parameters and driver specific callbacks -via devlink, e.g per error type (per health reporter) - - Configure reporter's generic parameters (like: disable/enable auto recovery) - - Invoke recovery procedure - - Run diagnostics - - Object dump - -The devlink health interface (via netlink): -DEVLINK_CMD_HEALTH_REPORTER_GET - Retrieves status and configuration info per DEV and reporter. -DEVLINK_CMD_HEALTH_REPORTER_SET - Allows reporter-related configuration setting. -DEVLINK_CMD_HEALTH_REPORTER_RECOVER - Triggers a reporter's recovery procedure. -DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - Retrieves diagnostics data from a reporter on a device. -DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - Retrieves the last stored dump. Devlink health - saves a single dump. If an dump is not already stored by the devlink - for this reporter, devlink generates a new dump. - dump output is defined by the reporter. -DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - Clears the last saved dump file for the specified reporter. - - - netlink - +--------------------------+ - | | - | + | - | | | - +--------------------------+ - |request for ops - |(diagnose, - mlx5_core devlink |recover, - |dump) -+--------+ +--------------------------+ -| | | reporter| | -| | | +---------v----------+ | -| | ops execution | | | | -| <----------------------------------+ | | -| | | | | | -| | | + ^------------------+ | -| | | | request for ops | -| | | | (recover, dump) | -| | | | | -| | | +-+------------------+ | -| | health report | | health handler | | -| +-------------------------------> | | -| | | +--------------------+ | -| | health reporter create | | -| +----------------------------> | -+--------+ +--------------------------+ diff --git a/Documentation/networking/devlink-params-mlxsw.txt b/Documentation/networking/devlink-params-mlxsw.txt new file mode 100644 index 000000000000..2c5c67a920c9 --- /dev/null +++ b/Documentation/networking/devlink-params-mlxsw.txt @@ -0,0 +1,2 @@ +fw_load_policy [DEVICE, GENERIC] + Configuration mode: driverinit diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 2196b824e96c..01603bc2eff1 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of: BPF_STX 0x03 BPF_STX 0x03 BPF_ALU 0x04 BPF_ALU 0x04 BPF_JMP 0x05 BPF_JMP 0x05 - BPF_RET 0x06 [ class 6 unused, for future if needed ] + BPF_RET 0x06 BPF_JMP32 0x06 BPF_MISC 0x07 BPF_ALU64 0x07 When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... @@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of: BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ BPF_END 0xd0 /* eBPF only: endianness conversion */ -If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: +If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of: - BPF_JA 0x00 + BPF_JA 0x00 /* BPF_JMP only */ BPF_JEQ 0x10 BPF_JGT 0x20 BPF_JGE 0x30 @@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: BPF_JNE 0x50 /* eBPF only: jump != */ BPF_JSGT 0x60 /* eBPF only: signed '>' */ BPF_JSGE 0x70 /* eBPF only: signed '>=' */ - BPF_CALL 0x80 /* eBPF only: function call */ - BPF_EXIT 0x90 /* eBPF only: function return */ + BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ + BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ BPF_JSLT 0xc0 /* eBPF only: signed '<' */ @@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret' operation. Classic BPF_RET | BPF_K means copy imm32 into return register and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT in eBPF means function exit only. The eBPF program needs to store return -value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently -unused and reserved for future use. +value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as +BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide +operands for the comparisons instead. For load and store instructions the 8-bit 'code' field is divided as: diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 6a47629ef8ed..f1627ca2a0ea 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -11,24 +11,25 @@ Contents: batman-adv can can_ucan_protocol - dpaa2/index - e100 - e1000 - e1000e - fm10k - igb - igbvf - ixgb - ixgbe - ixgbevf - i40e - iavf - ice + device_drivers/freescale/dpaa2/index + device_drivers/intel/e100 + device_drivers/intel/e1000 + device_drivers/intel/e1000e + device_drivers/intel/fm10k + device_drivers/intel/igb + device_drivers/intel/igbvf + device_drivers/intel/ixgb + device_drivers/intel/ixgbe + device_drivers/intel/ixgbevf + device_drivers/intel/i40e + device_drivers/intel/iavf + device_drivers/intel/ice kapi z8530book msg_zerocopy failover net_failover + phy alias bridge snmp_counter diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst new file mode 100644 index 000000000000..0dd90d7df5ec --- /dev/null +++ b/Documentation/networking/phy.rst @@ -0,0 +1,447 @@ +===================== +PHY Abstraction Layer +===================== + +Purpose +======= + +Most network devices consist of set of registers which provide an interface +to a MAC layer, which communicates with the physical connection through a +PHY. The PHY concerns itself with negotiating link parameters with the link +partner on the other side of the network connection (typically, an ethernet +cable), and provides a register interface to allow drivers to determine what +settings were chosen, and to configure what settings are allowed. + +While these devices are distinct from the network devices, and conform to a +standard layout for the registers, it has been common practice to integrate +the PHY management code with the network driver. This has resulted in large +amounts of redundant code. Also, on embedded systems with multiple (and +sometimes quite different) ethernet controllers connected to the same +management bus, it is difficult to ensure safe use of the bus. + +Since the PHYs are devices, and the management busses through which they are +accessed are, in fact, busses, the PHY Abstraction Layer treats them as such. +In doing so, it has these goals: + +#. Increase code-reuse +#. Increase overall code-maintainability +#. Speed development time for new network drivers, and for new systems + +Basically, this layer is meant to provide an interface to PHY devices which +allows network driver writers to write as little code as possible, while +still providing a full feature set. + +The MDIO bus +============ + +Most network devices are connected to a PHY by means of a management bus. +Different devices use different busses (though some share common interfaces). +In order to take advantage of the PAL, each bus interface needs to be +registered as a distinct device. + +#. read and write functions must be implemented. Their prototypes are:: + + int write(struct mii_bus *bus, int mii_id, int regnum, u16 value); + int read(struct mii_bus *bus, int mii_id, int regnum); + + mii_id is the address on the bus for the PHY, and regnum is the register + number. These functions are guaranteed not to be called from interrupt + time, so it is safe for them to block, waiting for an interrupt to signal + the operation is complete + +#. A reset function is optional. This is used to return the bus to an + initialized state. + +#. A probe function is needed. This function should set up anything the bus + driver needs, setup the mii_bus structure, and register with the PAL using + mdiobus_register. Similarly, there's a remove function to undo all of + that (use mdiobus_unregister). + +#. Like any driver, the device_driver structure must be configured, and init + exit functions are used to register the driver. + +#. The bus must also be declared somewhere as a device, and registered. + +As an example for how one driver implemented an mdio bus driver, see +drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file +for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/") + +(RG)MII/electrical interface considerations +=========================================== + +The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin +electrical signal interface using a synchronous 125Mhz clock signal and several +data lines. Due to this design decision, a 1.5ns to 2ns delay must be added +between the clock line (RXC or TXC) and the data lines to let the PHY (clock +sink) have enough setup and hold times to sample the data lines correctly. The +PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let +the PHY driver and optionally the MAC driver, implement the required delay. The +values of phy_interface_t must be understood from the perspective of the PHY +device itself, leading to the following: + +* PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any + internal delay by itself, it assumes that either the Ethernet MAC (if capable + or the PCB traces) insert the correct 1.5-2ns delay + +* PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay + for the transmit data lines (TXD[3:0]) processed by the PHY device + +* PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay + for the receive data lines (RXD[3:0]) processed by the PHY device + +* PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for + both transmit AND receive data lines from/to the PHY device + +Whenever possible, use the PHY side RGMII delay for these reasons: + +* PHY devices may offer sub-nanosecond granularity in how they allow a + receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such + precision may be required to account for differences in PCB trace lengths + +* PHY devices are typically qualified for a large range of applications + (industrial, medical, automotive...), and they provide a constant and + reliable delay across temperature/pressure/voltage ranges + +* PHY device drivers in PHYLIB being reusable by nature, being able to + configure correctly a specified delay enables more designs with similar delay + requirements to be operate correctly + +For cases where the PHY is not capable of providing this delay, but the +Ethernet MAC driver is capable of doing so, the correct phy_interface_t value +should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be +configured correctly in order to provide the required transmit and/or receive +side delay from the perspective of the PHY device. Conversely, if the Ethernet +MAC driver looks at the phy_interface_t value, for any other mode but +PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are +disabled. + +In case neither the Ethernet MAC, nor the PHY are capable of providing the +required delays, as defined per the RGMII standard, several options may be +available: + +* Some SoCs may offer a pin pad/mux/controller capable of configuring a given + set of pins'strength, delays, and voltage; and it may be a suitable + option to insert the expected 2ns RGMII delay. + +* Modifying the PCB design to include a fixed delay (e.g: using a specifically + designed serpentine), which may not require software configuration at all. + +Common problems with RGMII delay mismatch +----------------------------------------- + +When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this +will most likely result in the clock and data line signals to be unstable when +the PHY or MAC take a snapshot of these signals to translate them into logical +1 or 0 states and reconstruct the data being transmitted/received. Typical +symptoms include: + +* Transmission/reception partially works, and there is frequent or occasional + packet loss observed + +* Ethernet MAC may report some or all packets ingressing with a FCS/CRC error, + or just discard them all + +* Switching to lower speeds such as 10/100Mbits/sec makes the problem go away + (since there is enough setup/hold time in that case) + +Connecting to a PHY +=================== + +Sometime during startup, the network driver needs to establish a connection +between the PHY device, and the network device. At this time, the PHY's bus +and drivers need to all have been loaded, so it is ready for the connection. +At this point, there are several ways to connect to the PHY: + +#. The PAL handles everything, and only calls the network driver when + the link state changes, so it can react. + +#. The PAL handles everything except interrupts (usually because the + controller has the interrupt registers). + +#. The PAL handles everything, but checks in with the driver every second, + allowing the network driver to react first to any changes before the PAL + does. + +#. The PAL serves only as a library of functions, with the network device + manually calling functions to update status, and configure the PHY + + +Letting the PHY Abstraction Layer do Everything +=============================================== + +If you choose option 1 (The hope is that every driver can, but to still be +useful to drivers that can't), connecting to the PHY is simple: + +First, you need a function to react to changes in the link state. This +function follows this protocol:: + + static void adjust_link(struct net_device *dev); + +Next, you need to know the device name of the PHY connected to this device. +The name will look something like, "0:00", where the first number is the +bus id, and the second is the PHY's address on that bus. Typically, +the bus is responsible for making its ID unique. + +Now, to connect, just call this function:: + + phydev = phy_connect(dev, phy_name, &adjust_link, interface); + +*phydev* is a pointer to the phy_device structure which represents the PHY. +If phy_connect is successful, it will return the pointer. dev, here, is the +pointer to your net_device. Once done, this function will have started the +PHY's software state machine, and registered for the PHY's interrupt, if it +has one. The phydev structure will be populated with information about the +current state, though the PHY will not yet be truly operational at this +point. + +PHY-specific flags should be set in phydev->dev_flags prior to the call +to phy_connect() such that the underlying PHY driver can check for flags +and perform specific operations based on them. +This is useful if the system has put hardware restrictions on +the PHY/controller, of which the PHY needs to be aware. + +*interface* is a u32 which specifies the connection type used +between the controller and the PHY. Examples are GMII, MII, +RGMII, and SGMII. For a full list, see include/linux/phy.h + +Now just make sure that phydev->supported and phydev->advertising have any +values pruned from them which don't make sense for your controller (a 10/100 +controller may be connected to a gigabit capable PHY, so you would need to +mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions +for these bitfields. Note that you should not SET any bits, except the +SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get +put into an unsupported state. + +Lastly, once the controller is ready to handle network traffic, you call +phy_start(phydev). This tells the PAL that you are ready, and configures the +PHY to connect to the network. If the MAC interrupt of your network driver +also handles PHY status changes, just set phydev->irq to PHY_IGNORE_INTERRUPT +before you call phy_start and use phy_mac_interrupt() from the network +driver. If you don't want to use interrupts, set phydev->irq to PHY_POLL. +phy_start() enables the PHY interrupts (if applicable) and starts the +phylib state machine. + +When you want to disconnect from the network (even if just briefly), you call +phy_stop(phydev). This function also stops the phylib state machine and +disables PHY interrupts. + +Pause frames / flow control +=========================== + +The PHY does not participate directly in flow control/pause frames except by +making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in +MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC +controller supports such a thing. Since flow control/pause frames generation +involves the Ethernet MAC driver, it is recommended that this driver takes care +of properly indicating advertisement and support for such features by setting +the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done +either before or after phy_connect() and/or as a result of implementing the +ethtool::set_pauseparam feature. + + +Keeping Close Tabs on the PAL +============================= + +It is possible that the PAL's built-in state machine needs a little help to +keep your network device and the PHY properly in sync. If so, you can +register a helper function when connecting to the PHY, which will be called +every second before the state machine reacts to any changes. To do this, you +need to manually call phy_attach() and phy_prepare_link(), and then call +phy_start_machine() with the second argument set to point to your special +handler. + +Currently there are no examples of how to use this functionality, and testing +on it has been limited because the author does not have any drivers which use +it (they all use option 1). So Caveat Emptor. + +Doing it all yourself +===================== + +There's a remote chance that the PAL's built-in state machine cannot track +the complex interactions between the PHY and your network device. If this is +so, you can simply call phy_attach(), and not call phy_start_machine or +phy_prepare_link(). This will mean that phydev->state is entirely yours to +handle (phy_start and phy_stop toggle between some of the states, so you +might need to avoid them). + +An effort has been made to make sure that useful functionality can be +accessed without the state-machine running, and most of these functions are +descended from functions which did not interact with a complex state-machine. +However, again, no effort has been made so far to test running without the +state machine, so tryer beware. + +Here is a brief rundown of the functions:: + + int phy_read(struct phy_device *phydev, u16 regnum); + int phy_write(struct phy_device *phydev, u16 regnum, u16 val); + +Simple read/write primitives. They invoke the bus's read/write function +pointers. +:: + + void phy_print_status(struct phy_device *phydev); + +A convenience function to print out the PHY status neatly. +:: + + void phy_request_interrupt(struct phy_device *phydev); + +Requests the IRQ for the PHY interrupts. +:: + + struct phy_device * phy_attach(struct net_device *dev, const char *phy_id, + phy_interface_t interface); + +Attaches a network device to a particular PHY, binding the PHY to a generic +driver if none was found during bus initialization. +:: + + int phy_start_aneg(struct phy_device *phydev); + +Using variables inside the phydev structure, either configures advertising +and resets autonegotiation, or disables autonegotiation, and configures +forced settings. +:: + + static inline int phy_read_status(struct phy_device *phydev); + +Fills the phydev structure with up-to-date information about the current +settings in the PHY. +:: + + int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); + +Ethtool convenience functions. +:: + + int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); + +The MII ioctl. Note that this function will completely screw up the state +machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to +use this only to write registers which are not standard, and don't set off +a renegotiation. + +PHY Device Drivers +================== + +With the PHY Abstraction Layer, adding support for new PHYs is +quite easy. In some cases, no work is required at all! However, +many PHYs require a little hand-holding to get up-and-running. + +Generic PHY driver +------------------ + +If the desired PHY doesn't have any errata, quirks, or special +features you want to support, then it may be best to not add +support, and let the PHY Abstraction Layer's Generic PHY Driver +do all of the work. + +Writing a PHY driver +-------------------- + +If you do need to write a PHY driver, the first thing to do is +make sure it can be matched with an appropriate PHY device. +This is done during bus initialization by reading the device's +UID (stored in registers 2 and 3), then comparing it to each +driver's phy_id field by ANDing it with each driver's +phy_id_mask field. Also, it needs a name. Here's an example:: + + static struct phy_driver dm9161_driver = { + .phy_id = 0x0181b880, + .name = "Davicom DM9161E", + .phy_id_mask = 0x0ffffff0, + ... + } + +Next, you need to specify what features (speed, duplex, autoneg, +etc) your PHY device and driver support. Most PHYs support +PHY_BASIC_FEATURES, but you can look in include/mii.h for other +features. + +Each driver consists of a number of function pointers, documented +in include/linux/phy.h under the phy_driver structure. + +Of these, only config_aneg and read_status are required to be +assigned by the driver code. The rest are optional. Also, it is +preferred to use the generic phy driver's versions of these two +functions if at all possible: genphy_read_status and +genphy_config_aneg. If this is not possible, it is likely that +you only need to perform some actions before and after invoking +these functions, and so your functions will wrap the generic +ones. + +Feel free to look at the Marvell, Cicada, and Davicom drivers in +drivers/net/phy/ for examples (the lxt and qsemi drivers have +not been tested as of this writing). + +The PHY's MMD register accesses are handled by the PAL framework +by default, but can be overridden by a specific PHY driver if +required. This could be the case if a PHY was released for +manufacturing before the MMD PHY register definitions were +standardized by the IEEE. Most modern PHYs will be able to use +the generic PAL framework for accessing the PHY's MMD registers. +An example of such usage is for Energy Efficient Ethernet support, +implemented in the PAL. This support uses the PAL to access MMD +registers for EEE query and configuration if the PHY supports +the IEEE standard access mechanisms, or can use the PHY's specific +access interfaces if overridden by the specific PHY driver. See +the Micrel driver in drivers/net/phy/ for an example of how this +can be implemented. + +Board Fixups +============ + +Sometimes the specific interaction between the platform and the PHY requires +special handling. For instance, to change where the PHY's clock input is, +or to add a delay to account for latency issues in the data path. In order +to support such contingencies, the PHY Layer allows platform code to register +fixups to be run when the PHY is brought up (or subsequently reset). + +When the PHY Layer brings up a PHY it checks to see if there are any fixups +registered for it, matching based on UID (contained in the PHY device's phy_id +field) and the bus identifier (contained in phydev->dev.bus_id). Both must +match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as +wildcards for the bus ID and UID, respectively. + +When a match is found, the PHY layer will invoke the run function associated +with the fixup. This function is passed a pointer to the phy_device of +interest. It should therefore only operate on that PHY. + +The platform code can either register the fixup using phy_register_fixup():: + + int phy_register_fixup(const char *phy_id, + u32 phy_uid, u32 phy_uid_mask, + int (*run)(struct phy_device *)); + +Or using one of the two stubs, phy_register_fixup_for_uid() and +phy_register_fixup_for_id():: + + int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, + int (*run)(struct phy_device *)); + int phy_register_fixup_for_id(const char *phy_id, + int (*run)(struct phy_device *)); + +The stubs set one of the two matching criteria, and set the other one to +match anything. + +When phy_register_fixup() or \*_for_uid()/\*_for_id() is called at module, +unregister fixup and free allocate memory are required. + +Call one of following function before unloading module:: + + int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask); + int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); + int phy_register_fixup_for_id(const char *phy_id); + +Standards +========= + +IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two: +http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf + +RGMII v1.3: +http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf + +RGMII v2.0: +http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt deleted file mode 100644 index bdec0f700bc1..000000000000 --- a/Documentation/networking/phy.txt +++ /dev/null @@ -1,427 +0,0 @@ - -------- -PHY Abstraction Layer -(Updated 2008-04-08) - -Purpose - - Most network devices consist of set of registers which provide an interface - to a MAC layer, which communicates with the physical connection through a - PHY. The PHY concerns itself with negotiating link parameters with the link - partner on the other side of the network connection (typically, an ethernet - cable), and provides a register interface to allow drivers to determine what - settings were chosen, and to configure what settings are allowed. - - While these devices are distinct from the network devices, and conform to a - standard layout for the registers, it has been common practice to integrate - the PHY management code with the network driver. This has resulted in large - amounts of redundant code. Also, on embedded systems with multiple (and - sometimes quite different) ethernet controllers connected to the same - management bus, it is difficult to ensure safe use of the bus. - - Since the PHYs are devices, and the management busses through which they are - accessed are, in fact, busses, the PHY Abstraction Layer treats them as such. - In doing so, it has these goals: - - 1) Increase code-reuse - 2) Increase overall code-maintainability - 3) Speed development time for new network drivers, and for new systems - - Basically, this layer is meant to provide an interface to PHY devices which - allows network driver writers to write as little code as possible, while - still providing a full feature set. - -The MDIO bus - - Most network devices are connected to a PHY by means of a management bus. - Different devices use different busses (though some share common interfaces). - In order to take advantage of the PAL, each bus interface needs to be - registered as a distinct device. - - 1) read and write functions must be implemented. Their prototypes are: - - int write(struct mii_bus *bus, int mii_id, int regnum, u16 value); - int read(struct mii_bus *bus, int mii_id, int regnum); - - mii_id is the address on the bus for the PHY, and regnum is the register - number. These functions are guaranteed not to be called from interrupt - time, so it is safe for them to block, waiting for an interrupt to signal - the operation is complete - - 2) A reset function is optional. This is used to return the bus to an - initialized state. - - 3) A probe function is needed. This function should set up anything the bus - driver needs, setup the mii_bus structure, and register with the PAL using - mdiobus_register. Similarly, there's a remove function to undo all of - that (use mdiobus_unregister). - - 4) Like any driver, the device_driver structure must be configured, and init - exit functions are used to register the driver. - - 5) The bus must also be declared somewhere as a device, and registered. - - As an example for how one driver implemented an mdio bus driver, see - drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file - for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/") - -(RG)MII/electrical interface considerations - - The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin - electrical signal interface using a synchronous 125Mhz clock signal and several - data lines. Due to this design decision, a 1.5ns to 2ns delay must be added - between the clock line (RXC or TXC) and the data lines to let the PHY (clock - sink) have enough setup and hold times to sample the data lines correctly. The - PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let - the PHY driver and optionally the MAC driver, implement the required delay. The - values of phy_interface_t must be understood from the perspective of the PHY - device itself, leading to the following: - - * PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any - internal delay by itself, it assumes that either the Ethernet MAC (if capable - or the PCB traces) insert the correct 1.5-2ns delay - - * PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay - for the transmit data lines (TXD[3:0]) processed by the PHY device - - * PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay - for the receive data lines (RXD[3:0]) processed by the PHY device - - * PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for - both transmit AND receive data lines from/to the PHY device - - Whenever possible, use the PHY side RGMII delay for these reasons: - - * PHY devices may offer sub-nanosecond granularity in how they allow a - receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such - precision may be required to account for differences in PCB trace lengths - - * PHY devices are typically qualified for a large range of applications - (industrial, medical, automotive...), and they provide a constant and - reliable delay across temperature/pressure/voltage ranges - - * PHY device drivers in PHYLIB being reusable by nature, being able to - configure correctly a specified delay enables more designs with similar delay - requirements to be operate correctly - - For cases where the PHY is not capable of providing this delay, but the - Ethernet MAC driver is capable of doing so, the correct phy_interface_t value - should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be - configured correctly in order to provide the required transmit and/or receive - side delay from the perspective of the PHY device. Conversely, if the Ethernet - MAC driver looks at the phy_interface_t value, for any other mode but - PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are - disabled. - - In case neither the Ethernet MAC, nor the PHY are capable of providing the - required delays, as defined per the RGMII standard, several options may be - available: - - * Some SoCs may offer a pin pad/mux/controller capable of configuring a given - set of pins'strength, delays, and voltage; and it may be a suitable - option to insert the expected 2ns RGMII delay. - - * Modifying the PCB design to include a fixed delay (e.g: using a specifically - designed serpentine), which may not require software configuration at all. - -Common problems with RGMII delay mismatch - - When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this - will most likely result in the clock and data line signals to be unstable when - the PHY or MAC take a snapshot of these signals to translate them into logical - 1 or 0 states and reconstruct the data being transmitted/received. Typical - symptoms include: - - * Transmission/reception partially works, and there is frequent or occasional - packet loss observed - - * Ethernet MAC may report some or all packets ingressing with a FCS/CRC error, - or just discard them all - - * Switching to lower speeds such as 10/100Mbits/sec makes the problem go away - (since there is enough setup/hold time in that case) - - -Connecting to a PHY - - Sometime during startup, the network driver needs to establish a connection - between the PHY device, and the network device. At this time, the PHY's bus - and drivers need to all have been loaded, so it is ready for the connection. - At this point, there are several ways to connect to the PHY: - - 1) The PAL handles everything, and only calls the network driver when - the link state changes, so it can react. - - 2) The PAL handles everything except interrupts (usually because the - controller has the interrupt registers). - - 3) The PAL handles everything, but checks in with the driver every second, - allowing the network driver to react first to any changes before the PAL - does. - - 4) The PAL serves only as a library of functions, with the network device - manually calling functions to update status, and configure the PHY - - -Letting the PHY Abstraction Layer do Everything - - If you choose option 1 (The hope is that every driver can, but to still be - useful to drivers that can't), connecting to the PHY is simple: - - First, you need a function to react to changes in the link state. This - function follows this protocol: - - static void adjust_link(struct net_device *dev); - - Next, you need to know the device name of the PHY connected to this device. - The name will look something like, "0:00", where the first number is the - bus id, and the second is the PHY's address on that bus. Typically, - the bus is responsible for making its ID unique. - - Now, to connect, just call this function: - - phydev = phy_connect(dev, phy_name, &adjust_link, interface); - - phydev is a pointer to the phy_device structure which represents the PHY. If - phy_connect is successful, it will return the pointer. dev, here, is the - pointer to your net_device. Once done, this function will have started the - PHY's software state machine, and registered for the PHY's interrupt, if it - has one. The phydev structure will be populated with information about the - current state, though the PHY will not yet be truly operational at this - point. - - PHY-specific flags should be set in phydev->dev_flags prior to the call - to phy_connect() such that the underlying PHY driver can check for flags - and perform specific operations based on them. - This is useful if the system has put hardware restrictions on - the PHY/controller, of which the PHY needs to be aware. - - interface is a u32 which specifies the connection type used - between the controller and the PHY. Examples are GMII, MII, - RGMII, and SGMII. For a full list, see include/linux/phy.h - - Now just make sure that phydev->supported and phydev->advertising have any - values pruned from them which don't make sense for your controller (a 10/100 - controller may be connected to a gigabit capable PHY, so you would need to - mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions - for these bitfields. Note that you should not SET any bits, except the - SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get - put into an unsupported state. - - Lastly, once the controller is ready to handle network traffic, you call - phy_start(phydev). This tells the PAL that you are ready, and configures the - PHY to connect to the network. If you want to handle your own interrupts, - just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start. - Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL. - - When you want to disconnect from the network (even if just briefly), you call - phy_stop(phydev). - -Pause frames / flow control - - The PHY does not participate directly in flow control/pause frames except by - making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in - MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC - controller supports such a thing. Since flow control/pause frames generation - involves the Ethernet MAC driver, it is recommended that this driver takes care - of properly indicating advertisement and support for such features by setting - the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done - either before or after phy_connect() and/or as a result of implementing the - ethtool::set_pauseparam feature. - - -Keeping Close Tabs on the PAL - - It is possible that the PAL's built-in state machine needs a little help to - keep your network device and the PHY properly in sync. If so, you can - register a helper function when connecting to the PHY, which will be called - every second before the state machine reacts to any changes. To do this, you - need to manually call phy_attach() and phy_prepare_link(), and then call - phy_start_machine() with the second argument set to point to your special - handler. - - Currently there are no examples of how to use this functionality, and testing - on it has been limited because the author does not have any drivers which use - it (they all use option 1). So Caveat Emptor. - -Doing it all yourself - - There's a remote chance that the PAL's built-in state machine cannot track - the complex interactions between the PHY and your network device. If this is - so, you can simply call phy_attach(), and not call phy_start_machine or - phy_prepare_link(). This will mean that phydev->state is entirely yours to - handle (phy_start and phy_stop toggle between some of the states, so you - might need to avoid them). - - An effort has been made to make sure that useful functionality can be - accessed without the state-machine running, and most of these functions are - descended from functions which did not interact with a complex state-machine. - However, again, no effort has been made so far to test running without the - state machine, so tryer beware. - - Here is a brief rundown of the functions: - - int phy_read(struct phy_device *phydev, u16 regnum); - int phy_write(struct phy_device *phydev, u16 regnum, u16 val); - - Simple read/write primitives. They invoke the bus's read/write function - pointers. - - void phy_print_status(struct phy_device *phydev); - - A convenience function to print out the PHY status neatly. - - int phy_start_interrupts(struct phy_device *phydev); - int phy_stop_interrupts(struct phy_device *phydev); - - Requests the IRQ for the PHY interrupts, then enables them for - start, or disables then frees them for stop. - - struct phy_device * phy_attach(struct net_device *dev, const char *phy_id, - phy_interface_t interface); - - Attaches a network device to a particular PHY, binding the PHY to a generic - driver if none was found during bus initialization. - - int phy_start_aneg(struct phy_device *phydev); - - Using variables inside the phydev structure, either configures advertising - and resets autonegotiation, or disables autonegotiation, and configures - forced settings. - - static inline int phy_read_status(struct phy_device *phydev); - - Fills the phydev structure with up-to-date information about the current - settings in the PHY. - - int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); - - Ethtool convenience functions. - - int phy_mii_ioctl(struct phy_device *phydev, - struct mii_ioctl_data *mii_data, int cmd); - - The MII ioctl. Note that this function will completely screw up the state - machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to - use this only to write registers which are not standard, and don't set off - a renegotiation. - - -PHY Device Drivers - - With the PHY Abstraction Layer, adding support for new PHYs is - quite easy. In some cases, no work is required at all! However, - many PHYs require a little hand-holding to get up-and-running. - -Generic PHY driver - - If the desired PHY doesn't have any errata, quirks, or special - features you want to support, then it may be best to not add - support, and let the PHY Abstraction Layer's Generic PHY Driver - do all of the work. - -Writing a PHY driver - - If you do need to write a PHY driver, the first thing to do is - make sure it can be matched with an appropriate PHY device. - This is done during bus initialization by reading the device's - UID (stored in registers 2 and 3), then comparing it to each - driver's phy_id field by ANDing it with each driver's - phy_id_mask field. Also, it needs a name. Here's an example: - - static struct phy_driver dm9161_driver = { - .phy_id = 0x0181b880, - .name = "Davicom DM9161E", - .phy_id_mask = 0x0ffffff0, - ... - } - - Next, you need to specify what features (speed, duplex, autoneg, - etc) your PHY device and driver support. Most PHYs support - PHY_BASIC_FEATURES, but you can look in include/mii.h for other - features. - - Each driver consists of a number of function pointers, documented - in include/linux/phy.h under the phy_driver structure. - - Of these, only config_aneg and read_status are required to be - assigned by the driver code. The rest are optional. Also, it is - preferred to use the generic phy driver's versions of these two - functions if at all possible: genphy_read_status and - genphy_config_aneg. If this is not possible, it is likely that - you only need to perform some actions before and after invoking - these functions, and so your functions will wrap the generic - ones. - - Feel free to look at the Marvell, Cicada, and Davicom drivers in - drivers/net/phy/ for examples (the lxt and qsemi drivers have - not been tested as of this writing). - - The PHY's MMD register accesses are handled by the PAL framework - by default, but can be overridden by a specific PHY driver if - required. This could be the case if a PHY was released for - manufacturing before the MMD PHY register definitions were - standardized by the IEEE. Most modern PHYs will be able to use - the generic PAL framework for accessing the PHY's MMD registers. - An example of such usage is for Energy Efficient Ethernet support, - implemented in the PAL. This support uses the PAL to access MMD - registers for EEE query and configuration if the PHY supports - the IEEE standard access mechanisms, or can use the PHY's specific - access interfaces if overridden by the specific PHY driver. See - the Micrel driver in drivers/net/phy/ for an example of how this - can be implemented. - -Board Fixups - - Sometimes the specific interaction between the platform and the PHY requires - special handling. For instance, to change where the PHY's clock input is, - or to add a delay to account for latency issues in the data path. In order - to support such contingencies, the PHY Layer allows platform code to register - fixups to be run when the PHY is brought up (or subsequently reset). - - When the PHY Layer brings up a PHY it checks to see if there are any fixups - registered for it, matching based on UID (contained in the PHY device's phy_id - field) and the bus identifier (contained in phydev->dev.bus_id). Both must - match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as - wildcards for the bus ID and UID, respectively. - - When a match is found, the PHY layer will invoke the run function associated - with the fixup. This function is passed a pointer to the phy_device of - interest. It should therefore only operate on that PHY. - - The platform code can either register the fixup using phy_register_fixup(): - - int phy_register_fixup(const char *phy_id, - u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); - - Or using one of the two stubs, phy_register_fixup_for_uid() and - phy_register_fixup_for_id(): - - int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); - int phy_register_fixup_for_id(const char *phy_id, - int (*run)(struct phy_device *)); - - The stubs set one of the two matching criteria, and set the other one to - match anything. - - When phy_register_fixup() or *_for_uid()/*_for_id() is called at module, - unregister fixup and free allocate memory are required. - - Call one of following function before unloading module. - - int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask); - int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); - int phy_register_fixup_for_id(const char *phy_id); - -Standards - - IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two: - http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf - - RGMII v1.3: - http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf - - RGMII v2.0: - http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index c9d052e0cf51..2df5894353d6 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1000,51 +1000,6 @@ The kernel interface functions are as follows: size should be set when the call is begun. tx_total_len may not be less than zero. - (*) Check to see the completion state of a call so that the caller can assess - whether it needs to be retried. - - enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, - RXRPC_CALL_REMOTELY_ABORTED, - RXRPC_CALL_LOCALLY_ABORTED, - RXRPC_CALL_LOCAL_ERROR, - RXRPC_CALL_NETWORK_ERROR, - }; - - int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, - u32 *_abort_code); - - On return, -EINPROGRESS will be returned if the call is still ongoing; if - it is finished, *_compl will be set to indicate the manner of completion, - *_abort_code will be set to any abort code that occurred. 0 will be - returned on a successful completion, -ECONNABORTED will be returned if the - client failed due to a remote abort and anything else will return an - appropriate error code. - - The caller should look at this information to decide if it's worth - retrying the call. - - (*) Retry a client call. - - int rxrpc_kernel_retry_call(struct socket *sock, - struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, - struct key *key); - - This attempts to partially reinitialise a call and submit it again while - reusing the original call's Tx queue to avoid the need to repackage and - re-encrypt the data to be sent. call indicates the call to retry, srx the - new address to send it to and key the encryption key to use for signing or - encrypting the packets. - - For this to work, the first Tx data packet must still be in the transmit - queue, and currently this is only permitted for local and network errors - and the call must not have been aborted. Any partially constructed Tx - packet is left as is and can continue being filled afterwards. - - It returns 0 if the call was requeued and an error otherwise. - (*) Get call RTT. u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call); diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index 486ab33acc3a..c5642f430d2e 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -366,6 +366,27 @@ to the accept queue. TCP Fast Open ============= +* TcpEstabResets +Defined in `RFC1213 tcpEstabResets`_. + +.. _RFC1213 tcpEstabResets: https://tools.ietf.org/html/rfc1213#page-48 + +* TcpAttemptFails +Defined in `RFC1213 tcpAttemptFails`_. + +.. _RFC1213 tcpAttemptFails: https://tools.ietf.org/html/rfc1213#page-48 + +* TcpOutRsts +Defined in `RFC1213 tcpOutRsts`_. The RFC says this counter indicates +the 'segments sent containing the RST flag', but in linux kernel, this +couner indicates the segments kerenl tried to send. The sending +process might be failed due to some errors (e.g. memory alloc failed). + +.. _RFC1213 tcpOutRsts: https://tools.ietf.org/html/rfc1213#page-52 + + +TCP Fast Path +============ When kernel receives a TCP packet, it has two paths to handler the packet, one is fast path, another is slow path. The comment in kernel code provides a good explanation of them, I pasted them below:: @@ -413,7 +434,6 @@ increase 1. TCP abort ========= - * TcpExtTCPAbortOnData It means TCP layer has data in flight, but need to close the @@ -589,7 +609,6 @@ packet yet, the sender would know packet 4 is out of order. The TCP stack of kernel will increase TcpExtTCPSACKReorder for both of the above scenarios. - DSACK ===== The DSACK is defined in `RFC2883`_. The receiver uses DSACK to report @@ -612,8 +631,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender. * TcpExtTCPDSACKRecv - -The TCP stack receives a DSACK, which indicate an acknowledged +The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received. * TcpExtTCPDSACKOfoRecv @@ -621,6 +639,56 @@ duplicate packet is received. The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received. +invalid SACK and DSACK +==================== +When a SACK (or DSACK) block is invalid, a corresponding counter would +be updated. The validation method is base on the start/end sequence +number of the SACK block. For more details, please refer the comment +of the function tcp_is_sackblock_valid in the kernel source code. A +SACK option could have up to 4 blocks, they are checked +individually. E.g., if 3 blocks of a SACk is invalid, the +corresponding counter would be updated 3 times. The comment of the +`Add counters for discarded SACK blocks`_ patch has additional +explaination: + +.. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32 + +* TcpExtTCPSACKDiscard +This counter indicates how many SACK blocks are invalid. If the invalid +SACK block is caused by ACK recording, the TCP stack will only ignore +it and won't update this counter. + +* TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo +When a DSACK block is invalid, one of these two counters would be +updated. Which counter will be updated depends on the undo_marker flag +of the TCP socket. If the undo_marker is not set, the TCP stack isn't +likely to re-transmit any packets, and we still receive an invalid +DSACK block, the reason might be that the packet is duplicated in the +middle of the network. In such scenario, TcpExtTCPDSACKIgnoredNoUndo +will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld +will be updated. As implied in its name, it might be an old packet. + +SACK shift +========= +The linux networking stack stores data in sk_buff struct (skb for +short). If a SACK block acrosses multiple skb, the TCP stack will try +to re-arrange data in these skb. E.g. if a SACK block acknowledges seq +10 to 15, skb1 has seq 10 to 13, skb2 has seq 14 to 20. The seq 14 and +15 in skb2 would be moved to skb1. This operation is 'shift'. If a +SACK block acknowledges seq 10 to 20, skb1 has seq 10 to 13, skb2 has +seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be +discard, this operation is 'merge'. + +* TcpExtTCPSackShifted +A skb is shifted + +* TcpExtTCPSackMerged +A skb is merged + +* TcpExtTCPSackShiftFallback +A skb should be shifted or merged, but the TCP stack doesn't do it for +some reasons. + TCP out of order ================ * TcpExtTCPOFOQueue @@ -721,6 +789,60 @@ unacknowledged number (more strict than `RFC 5961 section 5.2`_). .. _RFC 5961 section 4.2: https://tools.ietf.org/html/rfc5961#page-9 .. _RFC 5961 section 5.2: https://tools.ietf.org/html/rfc5961#page-11 +TCP receive window +================= +* TcpExtTCPWantZeroWindowAdv +Depending on current memory usage, the TCP stack tries to set receive +window to zero. But the receive window might still be a no-zero +value. For example, if the previous window size is 10, and the TCP +stack receives 3 bytes, the current window size would be 7 even if the +window size calculated by the memory usage is zero. + +* TcpExtTCPToZeroWindowAdv +The TCP receive window is set to zero from a no-zero value. + +* TcpExtTCPFromZeroWindowAdv +The TCP receive window is set to no-zero value from zero. + + +Delayed ACK +========== +The TCP Delayed ACK is a technique which is used for reducing the +packet count in the network. For more details, please refer the +`Delayed ACK wiki`_ + +.. _Delayed ACK wiki: https://en.wikipedia.org/wiki/TCP_delayed_acknowledgment + +* TcpExtDelayedACKs +A delayed ACK timer expires. The TCP stack will send a pure ACK packet +and exit the delayed ACK mode. + +* TcpExtDelayedACKLocked +A delayed ACK timer expires, but the TCP stack can't send an ACK +immediately due to the socket is locked by a userspace program. The +TCP stack will send a pure ACK later (after the userspace program +unlock the socket). When the TCP stack sends the pure ACK later, the +TCP stack will also update TcpExtDelayedACKs and exit the delayed ACK +mode. + +* TcpExtDelayedACKLost +It will be updated when the TCP stack receives a packet which has been +ACKed. A Delayed ACK loss might cause this issue, but it would also be +triggered by other reasons, such as a packet is duplicated in the +network. + +Tail Loss Probe (TLP) +=================== +TLP is an algorithm which is used to detect TCP packet loss. For more +details, please refer the `TLP paper`_. + +.. _TLP paper: https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01 + +* TcpExtTCPLossProbes +A TLP probe packet is sent. + +* TcpExtTCPLossProbeRecovery +A packet loss is detected and recovered by TLP. examples ======== diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1be0b6f9e0cb..9d1432e0aaa8 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -417,7 +417,7 @@ is again deprecated and ts[2] holds a hardware timestamp if set. Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in -/include/linux/net_tstamp.h as: +include/uapi/linux/net_tstamp.h as: struct hwtstamp_config { int flags; /* no flags defined right now, must be zero */ @@ -487,7 +487,7 @@ enum { HWTSTAMP_FILTER_PTP_V1_L4_EVENT, /* for the complete list of values, please check - * the include file /include/linux/net_tstamp.h + * the include file include/uapi/linux/net_tstamp.h */ }; diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 2793d4eac55f..bc0680706870 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -291,6 +291,20 @@ user space is responsible for creating them if needed. Default : 0 (for compatibility reasons) +devconf_inherit_init_net +---------------------------- + +Controls if a new network namespace should inherit all current +settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By +default, we keep the current behavior: for IPv4 we inherit all current +settings from init_net and for IPv6 we reset all settings to default. + +If set to 1, both IPv4 and IPv6 settings are forced to inherit from +current ones in init_net. If set to 2, both IPv4 and IPv6 settings are +forced to reset to their default values. + +Default : 0 (for compatibility reasons) + 2. /proc/sys/net/unix - Parameters for Unix domain sockets ------------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index a592b9992b46..019a2bcfbd09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3052,8 +3052,8 @@ F: include/linux/bcm963xx_nvram.h F: include/linux/bcm963xx_tag.h BROADCOM BNX2 GIGABIT ETHERNET DRIVER -M: Rasesh Mody -M: Dept-GELinuxNICDev@cavium.com +M: Rasesh Mody +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2.* @@ -3072,9 +3072,9 @@ S: Supported F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Ariel Elior -M: Sudarsana Kalluru -M: everest-linux-l2@cavium.com +M: Ariel Elior +M: Sudarsana Kalluru +M: GR-everest-linux-l2@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2x/ @@ -3249,9 +3249,9 @@ S: Supported F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER -M: Rasesh Mody -M: Sudarsana Kalluru -M: Dept-GELinuxNICDev@cavium.com +M: Rasesh Mody +M: Sudarsana Kalluru +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/brocade/bna/ @@ -3471,10 +3471,9 @@ F: drivers/i2c/busses/i2c-octeon* F: drivers/i2c/busses/i2c-thunderx* CAVIUM LIQUIDIO NETWORK DRIVER -M: Derek Chickles -M: Satanand Burla -M: Felix Manlunas -M: Raghu Vatsavayi +M: Derek Chickles +M: Satanand Burla +M: Felix Manlunas L: netdev@vger.kernel.org W: http://www.cavium.com S: Supported @@ -3979,6 +3978,7 @@ F: drivers/cpufreq/arm_big_little.c CPU POWER MONITORING SUBSYSTEM M: Thomas Renninger M: Shuah Khan +M: Shuah Khan L: linux-pm@vger.kernel.org S: Maintained F: tools/power/cpupower/ @@ -4123,7 +4123,7 @@ S: Maintained F: drivers/media/dvb-frontends/cxd2820r* CXGB3 ETHERNET DRIVER (CXGB3) -M: Arjun Vynipadath +M: Vishal Kulkarni L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@ -4152,7 +4152,7 @@ S: Supported F: drivers/crypto/chelsio CXGB4 ETHERNET DRIVER (CXGB4) -M: Arjun Vynipadath +M: Vishal Kulkarni L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@ -6024,6 +6024,12 @@ L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/dma/fsldma.* +FREESCALE ENETC ETHERNET DRIVERS +M: Claudiu Manoil +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/freescale/enetc/ + FREESCALE eTSEC ETHERNET DRIVER (GIANFAR) M: Claudiu Manoil L: netdev@vger.kernel.org @@ -6088,6 +6094,7 @@ M: Yangbo Lu L: netdev@vger.kernel.org S: Maintained F: drivers/ptp/ptp_qoriq.c +F: drivers/ptp/ptp_qoriq_debugfs.c F: include/linux/fsl/ptp_qoriq.h F: Documentation/devicetree/bindings/ptp/ptp-qoriq.txt @@ -8259,6 +8266,7 @@ F: include/uapi/linux/sunrpc/ KERNEL SELFTEST FRAMEWORK M: Shuah Khan +M: Shuah Khan L: linux-kselftest@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git Q: https://patchwork.kernel.org/project/linux-kselftest/list/ @@ -10690,9 +10698,9 @@ S: Maintained F: drivers/net/netdevsim/* NETXEN (1/10) GbE SUPPORT -M: Manish Chopra -M: Rahul Verma -M: Dept-GELinuxNICDev@cavium.com +M: Manish Chopra +M: Rahul Verma +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/netxen/ @@ -12476,8 +12484,8 @@ S: Supported F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Ariel Elior -M: everest-linux-l2@cavium.com +M: Ariel Elior +M: GR-everest-linux-l2@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qed/ @@ -12485,8 +12493,8 @@ F: include/linux/qed/ F: drivers/net/ethernet/qlogic/qede/ QLOGIC QL4xxx RDMA DRIVER -M: Michal Kalderon -M: Ariel Elior +M: Michal Kalderon +M: Ariel Elior L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qedr/ @@ -12506,7 +12514,7 @@ F: Documentation/scsi/LICENSE.qla2xxx F: drivers/scsi/qla2xxx/ QLOGIC QLA3XXX NETWORK DRIVER -M: Dept-GELinuxNICDev@cavium.com +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/qlogic/LICENSE.qla3xxx @@ -12520,16 +12528,16 @@ F: Documentation/scsi/LICENSE.qla4xxx F: drivers/scsi/qla4xxx/ QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Shahed Shaikh -M: Manish Chopra -M: Dept-GELinuxNICDev@cavium.com +M: Shahed Shaikh +M: Manish Chopra +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Manish Chopra -M: Dept-GELinuxNICDev@cavium.com +M: Manish Chopra +M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlge/ @@ -12609,6 +12617,14 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/qualcomm/emac/ +QUALCOMM ETHQOS ETHERNET DRIVER +M: Vinod Koul +M: Niklas Cassel +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +F: Documentation/devicetree/bindings/net/qcom,dwmac.txt + QUALCOMM GENERIC INTERFACE I2C DRIVER M: Alok Chauhan M: Karthikeyan Ramasubramanian @@ -15843,6 +15859,7 @@ F: drivers/usb/common/usb-otg-fsm.c USB OVER IP DRIVER M: Valentina Manea M: Shuah Khan +M: Shuah Khan L: linux-usb@vger.kernel.org S: Maintained F: Documentation/usb/usbip_protocol.txt @@ -16672,6 +16689,24 @@ T: git git://linuxtv.org/media_tree.git S: Maintained F: drivers/media/tuners/tuner-xc2028.* +XDP (eXpress Data Path) +M: Alexei Starovoitov +M: Daniel Borkmann +M: David S. Miller +M: Jakub Kicinski +M: Jesper Dangaard Brouer +M: John Fastabend +L: netdev@vger.kernel.org +L: xdp-newbies@vger.kernel.org +S: Supported +F: net/core/xdp.c +F: include/net/xdp.h +F: kernel/bpf/devmap.c +F: kernel/bpf/cpumap.c +F: include/trace/events/xdp.h +K: xdp +N: xdp + XDP SOCKETS (AF_XDP) M: Björn Töpel M: Magnus Karlsson diff --git a/Makefile b/Makefile index 499b96810995..141653226f3c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc4 NAME = Shy Crocodile # *DOCUMENTATION* @@ -955,6 +955,7 @@ ifdef CONFIG_STACK_VALIDATION endif endif +PHONY += prepare0 ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ @@ -1061,8 +1062,7 @@ scripts: scripts_basic scripts_dtc # archprepare is used in arch Makefiles and when processed asm symlink, # version.h and scripts_basic is processed / created. -# Listed in dependency order -PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 +PHONY += prepare archprepare prepare1 prepare2 prepare3 # prepare3 is used to check if we are building in a separate output directory, # and if so do: @@ -1360,11 +1360,11 @@ mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS)) mrproper: rm-files := $(wildcard $(MRPROPER_FILES)) mrproper-dirs := $(addprefix _mrproper_,scripts) -PHONY += $(mrproper-dirs) mrproper archmrproper +PHONY += $(mrproper-dirs) mrproper $(mrproper-dirs): $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) -mrproper: clean archmrproper $(mrproper-dirs) +mrproper: clean $(mrproper-dirs) $(call cmd,rmdirs) $(call cmd,rmfiles) diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index feed50ce89fa..caa270261521 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -3,23 +3,19 @@ generic-y += bugs.h generic-y += compat.h generic-y += device.h generic-y += div64.h -generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h -generic-y += fb.h generic-y += ftrace.h generic-y += hardirq.h generic-y += hw_irq.h generic-y += irq_regs.h generic-y += irq_work.h -generic-y += kmap_types.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h generic-y += parport.h -generic-y += pci.h generic-y += percpu.h generic-y += preempt.h generic-y += topology.h diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 49bfbd879caa..f1b86cef0905 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -216,6 +216,14 @@ struct bcr_fp_arcv2 { #endif }; +struct bcr_actionpoint { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:21, min:1, num:2, ver:8; +#else + unsigned int ver:8, num:2, min:1, pad:21; +#endif +}; + #include struct bcr_bpu_arcompact { @@ -283,7 +291,7 @@ struct cpuinfo_arc_cache { }; struct cpuinfo_arc_bpu { - unsigned int ver, full, num_cache, num_pred; + unsigned int ver, full, num_cache, num_pred, ret_stk; }; struct cpuinfo_arc_ccm { @@ -302,7 +310,7 @@ struct cpuinfo_arc { struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4, - debug:1, ap:1, smart:1, rtt:1, pad3:4, + ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; struct bcr_mpy extn_mpy; diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index ee9246184033..202b74c339f0 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -340,7 +340,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long word) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word) { if (!word) return word; @@ -400,9 +400,9 @@ static inline __attribute__ ((const)) int ffs(unsigned long x) /* * __ffs: Similar to ffs, but zero based (0-31) */ -static inline __attribute__ ((const)) int __ffs(unsigned long x) +static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) { - int n; + unsigned long n; asm volatile( " ffs.f %0, %1 \n" /* 0:31; 31(Z) if src 0 */ diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 9185541035cc..6958545390f0 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -103,7 +103,8 @@ static const char * const arc_pmu_ev_hw_map[] = { /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */ + /* All jump instructions that are taken */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ #ifdef CONFIG_ISA_ARCV2 [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 8aec462d90fb..861a8aea51f9 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -1,15 +1,10 @@ -/* - * Linux performance counter support for ARC700 series - * - * Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com) - * - * This code is inspired by the perf support of various other architectures. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ +// SPDX-License-Identifier: GPL-2.0+ +// +// Linux performance counter support for ARC CPUs. +// This code is inspired by the perf support of various other architectures. +// +// Copyright (C) 2013-2018 Synopsys, Inc. (www.synopsys.com) + #include #include #include @@ -19,12 +14,31 @@ #include #include +/* HW holds 8 symbols + one for null terminator */ +#define ARCPMU_EVENT_NAME_LEN 9 + +enum arc_pmu_attr_groups { + ARCPMU_ATTR_GR_EVENTS, + ARCPMU_ATTR_GR_FORMATS, + ARCPMU_NR_ATTR_GR +}; + +struct arc_pmu_raw_event_entry { + char name[ARCPMU_EVENT_NAME_LEN]; +}; + struct arc_pmu { struct pmu pmu; unsigned int irq; int n_counters; + int n_events; u64 max_period; int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; + + struct arc_pmu_raw_event_entry *raw_entry; + struct attribute **attrs; + struct perf_pmu_events_attr *attr; + const struct attribute_group *attr_groups[ARCPMU_NR_ATTR_GR + 1]; }; struct arc_pmu_cpu { @@ -49,6 +63,7 @@ static int callchain_trace(unsigned int addr, void *data) { struct arc_callchain_trace *ctrl = data; struct perf_callchain_entry_ctx *entry = ctrl->perf_stuff; + perf_callchain_store(entry, addr); if (ctrl->depth++ < 3) @@ -57,8 +72,8 @@ static int callchain_trace(unsigned int addr, void *data) return -1; } -void -perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) { struct arc_callchain_trace ctrl = { .depth = 0, @@ -68,8 +83,8 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re arc_unwind_core(NULL, regs, callchain_trace, &ctrl); } -void -perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) { /* * User stack can't be unwound trivially with kernel dwarf unwinder @@ -82,10 +97,10 @@ static struct arc_pmu *arc_pmu; static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu); /* read counter #idx; note that counter# != event# on ARC! */ -static uint64_t arc_pmu_read_counter(int idx) +static u64 arc_pmu_read_counter(int idx) { - uint32_t tmp; - uint64_t result; + u32 tmp; + u64 result; /* * ARC supports making 'snapshots' of the counters, so we don't @@ -94,7 +109,7 @@ static uint64_t arc_pmu_read_counter(int idx) write_aux_reg(ARC_REG_PCT_INDEX, idx); tmp = read_aux_reg(ARC_REG_PCT_CONTROL); write_aux_reg(ARC_REG_PCT_CONTROL, tmp | ARC_REG_PCT_CONTROL_SN); - result = (uint64_t) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32; + result = (u64) (read_aux_reg(ARC_REG_PCT_SNAPH)) << 32; result |= read_aux_reg(ARC_REG_PCT_SNAPL); return result; @@ -103,9 +118,9 @@ static uint64_t arc_pmu_read_counter(int idx) static void arc_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - uint64_t prev_raw_count = local64_read(&hwc->prev_count); - uint64_t new_raw_count = arc_pmu_read_counter(idx); - int64_t delta = new_raw_count - prev_raw_count; + u64 prev_raw_count = local64_read(&hwc->prev_count); + u64 new_raw_count = arc_pmu_read_counter(idx); + s64 delta = new_raw_count - prev_raw_count; /* * We aren't afraid of hwc->prev_count changing beneath our feet @@ -155,7 +170,7 @@ static int arc_pmu_event_init(struct perf_event *event) int ret; if (!is_sampling_event(event)) { - hwc->sample_period = arc_pmu->max_period; + hwc->sample_period = arc_pmu->max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } @@ -192,6 +207,18 @@ static int arc_pmu_event_init(struct perf_event *event) pr_debug("init cache event with h/w %08x \'%s\'\n", (int)hwc->config, arc_pmu_ev_hw_map[ret]); return 0; + + case PERF_TYPE_RAW: + if (event->attr.config >= arc_pmu->n_events) + return -ENOENT; + + hwc->config |= event->attr.config; + pr_debug("init raw event with idx %lld \'%s\'\n", + event->attr.config, + arc_pmu->raw_entry[event->attr.config].name); + + return 0; + default: return -ENOENT; } @@ -200,7 +227,7 @@ static int arc_pmu_event_init(struct perf_event *event) /* starts all counters */ static void arc_pmu_enable(struct pmu *pmu) { - uint32_t tmp; + u32 tmp; tmp = read_aux_reg(ARC_REG_PCT_CONTROL); write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x1); } @@ -208,7 +235,7 @@ static void arc_pmu_enable(struct pmu *pmu) /* stops all counters */ static void arc_pmu_disable(struct pmu *pmu) { - uint32_t tmp; + u32 tmp; tmp = read_aux_reg(ARC_REG_PCT_CONTROL); write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0); } @@ -228,7 +255,7 @@ static int arc_pmu_event_set_period(struct perf_event *event) local64_set(&hwc->period_left, left); hwc->last_period = period; overflow = 1; - } else if (unlikely(left <= 0)) { + } else if (unlikely(left <= 0)) { /* left underflowed by less than period. */ left += period; local64_set(&hwc->period_left, left); @@ -246,8 +273,8 @@ static int arc_pmu_event_set_period(struct perf_event *event) write_aux_reg(ARC_REG_PCT_INDEX, idx); /* Write value */ - write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value); - write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32)); + write_aux_reg(ARC_REG_PCT_COUNTL, lower_32_bits(value)); + write_aux_reg(ARC_REG_PCT_COUNTH, upper_32_bits(value)); perf_event_update_userpage(event); @@ -277,7 +304,7 @@ static void arc_pmu_start(struct perf_event *event, int flags) /* Enable interrupt for this counter */ if (is_sampling_event(event)) write_aux_reg(ARC_REG_PCT_INT_CTRL, - read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); + read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx)); /* enable ARC pmu here */ write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */ @@ -295,9 +322,9 @@ static void arc_pmu_stop(struct perf_event *event, int flags) * Reset interrupt flag by writing of 1. This is required * to make sure pending interrupt was not left. */ - write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); + write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx)); write_aux_reg(ARC_REG_PCT_INT_CTRL, - read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx)); + read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~BIT(idx)); } if (!(event->hw.state & PERF_HES_STOPPED)) { @@ -349,9 +376,10 @@ static int arc_pmu_add(struct perf_event *event, int flags) if (is_sampling_event(event)) { /* Mimic full counter overflow as other arches do */ - write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period); + write_aux_reg(ARC_REG_PCT_INT_CNTL, + lower_32_bits(arc_pmu->max_period)); write_aux_reg(ARC_REG_PCT_INT_CNTH, - (arc_pmu->max_period >> 32)); + upper_32_bits(arc_pmu->max_period)); } write_aux_reg(ARC_REG_PCT_CONFIG, 0); @@ -392,7 +420,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) idx = __ffs(active_ints); /* Reset interrupt flag by writing of 1 */ - write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); + write_aux_reg(ARC_REG_PCT_INT_ACT, BIT(idx)); /* * On reset of "interrupt active" bit corresponding @@ -400,7 +428,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) * Now we need to re-enable interrupt for the counter. */ write_aux_reg(ARC_REG_PCT_INT_CTRL, - read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); + read_aux_reg(ARC_REG_PCT_INT_CTRL) | BIT(idx)); event = pmu_cpu->act_counter[idx]; hwc = &event->hw; @@ -414,7 +442,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) arc_pmu_stop(event, 0); } - active_ints &= ~(1U << idx); + active_ints &= ~BIT(idx); } while (active_ints); done: @@ -441,19 +469,108 @@ static void arc_cpu_pmu_irq_init(void *data) write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); } +/* Event field occupies the bottom 15 bits of our config field */ +PMU_FORMAT_ATTR(event, "config:0-14"); +static struct attribute *arc_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group arc_pmu_format_attr_gr = { + .name = "format", + .attrs = arc_pmu_format_attrs, +}; + +static ssize_t arc_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +/* + * We don't add attrs here as we don't have pre-defined list of perf events. + * We will generate and add attrs dynamically in probe() after we read HW + * configuration. + */ +static struct attribute_group arc_pmu_events_attr_gr = { + .name = "events", +}; + +static void arc_pmu_add_raw_event_attr(int j, char *str) +{ + memmove(arc_pmu->raw_entry[j].name, str, ARCPMU_EVENT_NAME_LEN - 1); + arc_pmu->attr[j].attr.attr.name = arc_pmu->raw_entry[j].name; + arc_pmu->attr[j].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(0444); + arc_pmu->attr[j].attr.show = arc_pmu_events_sysfs_show; + arc_pmu->attr[j].id = j; + arc_pmu->attrs[j] = &(arc_pmu->attr[j].attr.attr); +} + +static int arc_pmu_raw_alloc(struct device *dev) +{ + arc_pmu->attr = devm_kmalloc_array(dev, arc_pmu->n_events + 1, + sizeof(*arc_pmu->attr), GFP_KERNEL | __GFP_ZERO); + if (!arc_pmu->attr) + return -ENOMEM; + + arc_pmu->attrs = devm_kmalloc_array(dev, arc_pmu->n_events + 1, + sizeof(*arc_pmu->attrs), GFP_KERNEL | __GFP_ZERO); + if (!arc_pmu->attrs) + return -ENOMEM; + + arc_pmu->raw_entry = devm_kmalloc_array(dev, arc_pmu->n_events, + sizeof(*arc_pmu->raw_entry), GFP_KERNEL | __GFP_ZERO); + if (!arc_pmu->raw_entry) + return -ENOMEM; + + return 0; +} + +static inline bool event_in_hw_event_map(int i, char *name) +{ + if (!arc_pmu_ev_hw_map[i]) + return false; + + if (!strlen(arc_pmu_ev_hw_map[i])) + return false; + + if (strcmp(arc_pmu_ev_hw_map[i], name)) + return false; + + return true; +} + +static void arc_pmu_map_hw_event(int j, char *str) +{ + int i; + + /* See if HW condition has been mapped to a perf event_id */ + for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { + if (event_in_hw_event_map(i, str)) { + pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", + i, str, j); + arc_pmu->ev_hw_idx[i] = j; + } + } +} + static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, j, has_interrupts; + int i, has_interrupts; int counter_size; /* in bits */ union cc_name { struct { - uint32_t word0, word1; + u32 word0, word1; char sentinel; } indiv; - char str[9]; + char str[ARCPMU_EVENT_NAME_LEN]; } cc_name; @@ -463,15 +580,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) return -ENODEV; } BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32); - BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS); + if (WARN_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS)) + return -EINVAL; READ_BCR(ARC_REG_CC_BUILD, cc_bcr); - BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */ + if (WARN(!cc_bcr.v, "Counters exist but No countable conditions?")) + return -EINVAL; arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL); if (!arc_pmu) return -ENOMEM; + arc_pmu->n_events = cc_bcr.c; + + if (arc_pmu_raw_alloc(&pdev->dev)) + return -ENOMEM; + has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0; arc_pmu->n_counters = pct_bcr.c; @@ -481,30 +605,26 @@ static int arc_pmu_device_probe(struct platform_device *pdev) pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n", arc_pmu->n_counters, counter_size, cc_bcr.c, - has_interrupts ? ", [overflow IRQ support]":""); + has_interrupts ? ", [overflow IRQ support]" : ""); - cc_name.str[8] = 0; + cc_name.str[ARCPMU_EVENT_NAME_LEN - 1] = 0; for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) arc_pmu->ev_hw_idx[i] = -1; /* loop thru all available h/w condition indexes */ - for (j = 0; j < cc_bcr.c; j++) { - write_aux_reg(ARC_REG_CC_INDEX, j); + for (i = 0; i < cc_bcr.c; i++) { + write_aux_reg(ARC_REG_CC_INDEX, i); cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); - /* See if it has been mapped to a perf event_id */ - for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { - if (arc_pmu_ev_hw_map[i] && - !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && - strlen(arc_pmu_ev_hw_map[i])) { - pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", - i, cc_name.str, j); - arc_pmu->ev_hw_idx[i] = j; - } - } + arc_pmu_map_hw_event(i, cc_name.str); + arc_pmu_add_raw_event_attr(i, cc_name.str); } + arc_pmu_events_attr_gr.attrs = arc_pmu->attrs; + arc_pmu->attr_groups[ARCPMU_ATTR_GR_EVENTS] = &arc_pmu_events_attr_gr; + arc_pmu->attr_groups[ARCPMU_ATTR_GR_FORMATS] = &arc_pmu_format_attr_gr; + arc_pmu->pmu = (struct pmu) { .pmu_enable = arc_pmu_enable, .pmu_disable = arc_pmu_disable, @@ -514,6 +634,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) .start = arc_pmu_start, .stop = arc_pmu_stop, .read = arc_pmu_read, + .attr_groups = arc_pmu->attr_groups, }; if (has_interrupts) { @@ -535,17 +656,19 @@ static int arc_pmu_device_probe(struct platform_device *pdev) } else arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW); + /* + * perf parser doesn't really like '-' symbol in events name, so let's + * use '_' in arc pct name as it goes to kernel PMU event prefix. + */ + return perf_pmu_register(&arc_pmu->pmu, "arc_pct", PERF_TYPE_RAW); } -#ifdef CONFIG_OF static const struct of_device_id arc_pmu_match[] = { { .compatible = "snps,arc700-pct" }, { .compatible = "snps,archs-pct" }, {}, }; MODULE_DEVICE_TABLE(of, arc_pmu_match); -#endif static struct platform_driver arc_pmu_driver = { .driver = { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 2e018b8c2e19..feb90093e6b1 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -123,6 +123,7 @@ static void read_arc_build_cfg_regs(void) struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; const struct id_to_str *tbl; struct bcr_isa_arcv2 isa; + struct bcr_actionpoint ap; FIX_PTR(cpu); @@ -195,6 +196,7 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.full = bpu.ft; cpu->bpu.num_cache = 256 << bpu.bce; cpu->bpu.num_pred = 2048 << bpu.pte; + cpu->bpu.ret_stk = 4 << bpu.rse; if (cpu->core.family >= 0x54) { unsigned int exec_ctrl; @@ -207,8 +209,11 @@ static void read_arc_build_cfg_regs(void) } } - READ_BCR(ARC_REG_AP_BCR, bcr); - cpu->extn.ap = bcr.ver ? 1 : 0; + READ_BCR(ARC_REG_AP_BCR, ap); + if (ap.ver) { + cpu->extn.ap_num = 2 << ap.num; + cpu->extn.ap_full = !!ap.min; + } READ_BCR(ARC_REG_SMART_BCR, bcr); cpu->extn.smart = bcr.ver ? 1 : 0; @@ -216,8 +221,6 @@ static void read_arc_build_cfg_regs(void) READ_BCR(ARC_REG_RTT_BCR, bcr); cpu->extn.rtt = bcr.ver ? 1 : 0; - cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; - READ_BCR(ARC_REG_ISA_CFG_BCR, isa); /* some hacks for lack of feature BCR info in old ARC700 cores */ @@ -299,10 +302,10 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, - "BPU\t\t: %s%s match, cache:%d, Predict Table:%d", + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d", IS_AVAIL1(cpu->bpu.full, "full"), IS_AVAIL1(!cpu->bpu.full, "partial"), - cpu->bpu.num_cache, cpu->bpu.num_pred); + cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk); if (is_isa_arcv2()) { struct bcr_lpb lpb; @@ -336,11 +339,17 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->extn.fpu_sp, "SP "), IS_AVAIL1(cpu->extn.fpu_dp, "DP ")); - if (cpu->extn.debug) - n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s%s\n", - IS_AVAIL1(cpu->extn.ap, "ActionPoint "), + if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) { + n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s", IS_AVAIL1(cpu->extn.smart, "smaRT "), IS_AVAIL1(cpu->extn.rtt, "RTT ")); + if (cpu->extn.ap_num) { + n += scnprintf(buf + n, len - n, "ActionPoint %d/%s", + cpu->extn.ap_num, + cpu->extn.ap_full ? "full":"min"); + } + n += scnprintf(buf + n, len - n, "\n"); + } if (cpu->dccm.sz || cpu->iccm.sz) n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n", diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index e8d9fb452346..215f515442e0 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -18,6 +18,8 @@ #include #include +#define ARC_PATH_MAX 256 + /* * Common routine to print scratch regs (r0-r12) or callee regs (r13-r25) * -Prints 3 regs per line and a CR. @@ -58,11 +60,12 @@ static void show_callee_regs(struct callee_regs *cregs) print_reg_file(&(cregs->r13), 13); } -static void print_task_path_n_nm(struct task_struct *tsk, char *buf) +static void print_task_path_n_nm(struct task_struct *tsk) { char *path_nm = NULL; struct mm_struct *mm; struct file *exe_file; + char buf[ARC_PATH_MAX]; mm = get_task_mm(tsk); if (!mm) @@ -72,7 +75,7 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf) mmput(mm); if (exe_file) { - path_nm = file_path(exe_file, buf, 255); + path_nm = file_path(exe_file, buf, ARC_PATH_MAX-1); fput(exe_file); } @@ -80,10 +83,9 @@ done: pr_info("Path: %s\n", !IS_ERR(path_nm) ? path_nm : "?"); } -static void show_faulting_vma(unsigned long address, char *buf) +static void show_faulting_vma(unsigned long address) { struct vm_area_struct *vma; - char *nm = buf; struct mm_struct *active_mm = current->active_mm; /* can't use print_vma_addr() yet as it doesn't check for @@ -96,8 +98,11 @@ static void show_faulting_vma(unsigned long address, char *buf) * if the container VMA is not found */ if (vma && (vma->vm_start <= address)) { + char buf[ARC_PATH_MAX]; + char *nm = "?"; + if (vma->vm_file) { - nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1); + nm = file_path(vma->vm_file, buf, ARC_PATH_MAX-1); if (IS_ERR(nm)) nm = "?"; } @@ -173,13 +178,14 @@ void show_regs(struct pt_regs *regs) { struct task_struct *tsk = current; struct callee_regs *cregs; - char *buf; - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return; + /* + * generic code calls us with preemption disabled, but some calls + * here could sleep, so re-enable to avoid lockdep splat + */ + preempt_enable(); - print_task_path_n_nm(tsk, buf); + print_task_path_n_nm(tsk); show_regs_print_info(KERN_INFO); show_ecr_verbose(regs); @@ -189,7 +195,7 @@ void show_regs(struct pt_regs *regs) (void *)regs->blink, (void *)regs->ret); if (user_mode(regs)) - show_faulting_vma(regs->ret, buf); /* faulting code, not data */ + show_faulting_vma(regs->ret); /* faulting code, not data */ pr_info("[STAT32]: 0x%08lx", regs->status32); @@ -222,7 +228,7 @@ void show_regs(struct pt_regs *regs) if (cregs) show_callee_regs(cregs); - free_page((unsigned long)buf); + preempt_disable(); } void show_kernel_fault_diag(const char *str, struct pt_regs *regs, diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S index 62ad4bcb841a..f230bb7092fd 100644 --- a/arch/arc/lib/memset-archs.S +++ b/arch/arc/lib/memset-archs.S @@ -7,11 +7,39 @@ */ #include +#include -#undef PREALLOC_NOT_AVAIL +/* + * The memset implementation below is optimized to use prefetchw and prealloc + * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) + * If you want to implement optimized memset for other possible L1 data cache + * line lengths (32B and 128B) you should rewrite code carefully checking + * we don't call any prefetchw/prealloc instruction for L1 cache lines which + * don't belongs to memset area. + */ + +#if L1_CACHE_SHIFT == 6 + +.macro PREALLOC_INSTR reg, off + prealloc [\reg, \off] +.endm + +.macro PREFETCHW_INSTR reg, off + prefetchw [\reg, \off] +.endm + +#else + +.macro PREALLOC_INSTR +.endm + +.macro PREFETCHW_INSTR +.endm + +#endif ENTRY_CFI(memset) - prefetchw [r0] ; Prefetch the write location + PREFETCHW_INSTR r0, 0 ; Prefetch the first write location mov.f 0, r2 ;;; if size is zero jz.d [blink] @@ -48,11 +76,8 @@ ENTRY_CFI(memset) lpnz @.Lset64bytes ;; LOOP START -#ifdef PREALLOC_NOT_AVAIL - prefetchw [r3, 64] ;Prefetch the next write location -#else - prealloc [r3, 64] -#endif + PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching + #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] @@ -85,7 +110,6 @@ ENTRY_CFI(memset) lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes lpnz .Lset32bytes ;; LOOP START - prefetchw [r3, 32] ;Prefetch the next write location #ifdef CONFIG_ARC_HAS_LL64 std.ab r4, [r3, 8] std.ab r4, [r3, 8] diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index a1d723197084..8df1638259f3 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -141,12 +141,17 @@ good_area: */ fault = handle_mm_fault(vma, address, flags); - /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ if (fatal_signal_pending(current)) { - if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY)) - up_read(&mm->mmap_sem); - if (user_mode(regs)) + + /* + * if fault retry, mmap_sem already relinquished by core mm + * so OK to return to user mode (with signal handled first) + */ + if (fault & VM_FAULT_RETRY) { + if (!user_mode(regs)) + goto no_context; return; + } } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 43bf4c3a1290..e1ab2d7f1d64 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -119,7 +119,8 @@ void __init setup_arch_memory(void) */ memblock_add_node(low_mem_start, low_mem_sz, 0); - memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); + memblock_reserve(CONFIG_LINUX_LINK_BASE, + __pa(_end) - CONFIG_LINUX_LINK_BASE); #ifdef CONFIG_BLK_DEV_INITRD if (phys_initrd_size) { diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index ed0941292172..ad75959b99c1 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -706,6 +706,7 @@ fsl,tmr-fiper1 = <999999995>; fsl,tmr-fiper2 = <99990>; fsl,max-adj = <499999999>; + fsl,extts-fifo; }; enet0: ethernet@2d10000 { diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index b3ef061d8b74..2c403e7c782d 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -1 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H +#define _ASM_ARM_XEN_PAGE_COHERENT_H + +#include +#include #include + +static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dev_dma_ops) + return dev->archdata.dev_dma_ops; + return get_arch_dma_ops(NULL); +} + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<map_page(hwdev, page, offset, size, dir, attrs); + else + __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long pfn = PFN_DOWN(handle); + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a + * foreign mfn will always return false. If the page is local we can + * safely call the native dma_ops function, otherwise we call the xen + * specific function. + */ + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->unmap_page) + xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); + } else + __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) + xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); + } else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->sync_single_for_device) + xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); + } else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 25b3ee85066e..c8bfbbfdfcc3 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1083,12 +1083,17 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, /* Arithmatic Operation */ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, - const u8 rn, struct jit_ctx *ctx, u8 op) { + const u8 rn, struct jit_ctx *ctx, u8 op, + bool is_jmp64) { switch (op) { case BPF_JSET: - emit(ARM_AND_R(ARM_IP, rt, rn), ctx); - emit(ARM_AND_R(ARM_LR, rd, rm), ctx); - emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); + if (is_jmp64) { + emit(ARM_AND_R(ARM_IP, rt, rn), ctx); + emit(ARM_AND_R(ARM_LR, rd, rm), ctx); + emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); + } else { + emit(ARM_ANDS_R(ARM_IP, rt, rn), ctx); + } break; case BPF_JEQ: case BPF_JNE: @@ -1096,18 +1101,25 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, case BPF_JGE: case BPF_JLE: case BPF_JLT: - emit(ARM_CMP_R(rd, rm), ctx); - _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); + if (is_jmp64) { + emit(ARM_CMP_R(rd, rm), ctx); + /* Only compare low halve if high halve are equal. */ + _emit(ARM_COND_EQ, ARM_CMP_R(rt, rn), ctx); + } else { + emit(ARM_CMP_R(rt, rn), ctx); + } break; case BPF_JSLE: case BPF_JSGT: emit(ARM_CMP_R(rn, rt), ctx); - emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); + if (is_jmp64) + emit(ARM_SBCS_R(ARM_IP, rm, rd), ctx); break; case BPF_JSLT: case BPF_JSGE: emit(ARM_CMP_R(rt, rn), ctx); - emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); + if (is_jmp64) + emit(ARM_SBCS_R(ARM_IP, rd, rm), ctx); break; } } @@ -1615,6 +1627,17 @@ exit: case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: /* Setup source registers */ rm = arm_bpf_get_reg32(src_hi, tmp2[0], ctx); rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); @@ -1641,6 +1664,17 @@ exit: case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: if (off == 0) break; rm = tmp2[0]; @@ -1652,7 +1686,8 @@ go_jmp: rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Check for the condition */ - emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code)); + emit_ar_r(rd[0], rd[1], rm, rn, ctx, BPF_OP(code), + BPF_CLASS(code) == BPF_JMP); /* Setup JUMP instruction */ jmp_offset = bpf2a32_offset(i+off, i, ctx); diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index f4e58bcdaa43..13a05f759552 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -62,6 +62,7 @@ #define ARM_INST_ADDS_I 0x02900000 #define ARM_INST_AND_R 0x00000000 +#define ARM_INST_ANDS_R 0x00100000 #define ARM_INST_AND_I 0x02000000 #define ARM_INST_BIC_R 0x01c00000 @@ -172,6 +173,7 @@ #define ARM_ADC_I(rd, rn, imm) _AL3_I(ARM_INST_ADC, rd, rn, imm) #define ARM_AND_R(rd, rn, rm) _AL3_R(ARM_INST_AND, rd, rn, rm) +#define ARM_ANDS_R(rd, rn, rm) _AL3_R(ARM_INST_ANDS, rd, rn, rm) #define ARM_AND_I(rd, rn, imm) _AL3_I(ARM_INST_AND, rd, rn, imm) #define ARM_BIC_R(rd, rn, rm) _AL3_R(ARM_INST_BIC, rd, rn, rm) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index eb43e09c1980..926434f413fa 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -60,8 +60,6 @@ #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) -#else -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 3dd3d664c5c5..4658c937e173 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -20,9 +20,6 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif -#ifdef CONFIG_XEN - const struct dma_map_ops *dev_dma_ops; -#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index ac352accb3d9..3e8063f4f9d3 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -60,8 +60,11 @@ static inline bool arm64_kernel_use_ng_mappings(void) * later determine that kpti is required, then * kpti_install_ng_mappings() will make them non-global. */ + if (arm64_kernel_unmapped_at_el0()) + return true; + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return arm64_kernel_unmapped_at_el0(); + return false; /* * KASLR is enabled so we're going to be enabling kpti on non-broken diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index b3ef061d8b74..d88e56b90b93 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -1 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H +#define _ASM_ARM64_XEN_PAGE_COHERENT_H + +#include +#include #include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + + if (pfn_valid(pfn)) + dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) + dma_direct_sync_single_for_device(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1< #include +#include #include #include #include @@ -43,7 +44,7 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init const u8 *get_cmdline(void *fdt) +static __init const u8 *kaslr_get_cmdline(void *fdt) { static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; @@ -109,7 +110,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - cmdline = get_cmdline(fdt); + cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) return 0; @@ -169,5 +170,8 @@ u64 __init kaslr_early_init(u64 dt_phys) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; + __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + return offset; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index fb0908456a1f..78c0a72f822c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -466,9 +466,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, __iommu_setup_dma_ops(dev, dma_base, size, iommu); #ifdef CONFIG_XEN - if (xen_initial_domain()) { - dev->archdata.dev_dma_ops = dev->dma_ops; + if (xen_initial_domain()) dev->dma_ops = xen_dma_ops; - } #endif } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1542df00b23c..aaddc0217e73 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -362,7 +362,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; - const bool is64 = BPF_CLASS(code) == BPF_ALU64; + const bool is64 = BPF_CLASS(code) == BPF_ALU64 || + BPF_CLASS(code) == BPF_JMP; const bool isdw = BPF_SIZE(code) == BPF_DW; u8 jmp_cond; s32 jmp_offset; @@ -559,7 +560,17 @@ emit_bswap_uxt: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: - emit(A64_CMP(1, dst, src), ctx); + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + emit(A64_CMP(is64, dst, src), ctx); emit_cond_jmp: jmp_offset = bpf2a64_offset(i + off, i, ctx); check_imm19(jmp_offset); @@ -601,7 +612,8 @@ emit_cond_jmp: emit(A64_B_(jmp_cond, jmp_offset), ctx); break; case BPF_JMP | BPF_JSET | BPF_X: - emit(A64_TST(1, dst, src), ctx); + case BPF_JMP32 | BPF_JSET | BPF_X: + emit(A64_TST(is64, dst, src), ctx); goto emit_cond_jmp; /* IF (dst COND imm) JUMP off */ case BPF_JMP | BPF_JEQ | BPF_K: @@ -614,12 +626,23 @@ emit_cond_jmp: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: - emit_a64_mov_i(1, tmp, imm, ctx); - emit(A64_CMP(1, dst, tmp), ctx); + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_CMP(is64, dst, tmp), ctx); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: - emit_a64_mov_i(1, tmp, imm, ctx); - emit(A64_TST(1, dst, tmp), ctx); + case BPF_JMP32 | BPF_JSET | BPF_K: + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_TST(is64, dst, tmp), ctx); goto emit_cond_jmp; /* function call */ case BPF_JMP | BPF_CALL: diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index 4003ddc616e1..f801f3708a89 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -37,8 +37,6 @@ libs-y += arch/$(ARCH)/lib/ boot := arch/h8300/boot -archmrproper: - archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 320d86f192ee..171290f9f1de 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -16,8 +16,6 @@ KBUILD_DEFCONFIG := generic_defconfig NM := $(CROSS_COMPILE)nm -B READELF := $(CROSS_COMPILE)readelf -export AWK - CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ OBJCOPYFLAGS := --strip-all diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 787290781b8c..0d14f51d0002 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3155,6 +3155,7 @@ config MIPS32_O32 config MIPS32_N32 bool "Kernel support for n32 binaries" depends on 64BIT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select COMPAT select MIPS32_COMPAT select SYSVIPC_COMPAT if SYSVIPC diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 6054d49e608e..fe3773539eff 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -173,6 +173,31 @@ void __init plat_mem_setup(void) pm_power_off = bcm47xx_machine_halt; } +#ifdef CONFIG_BCM47XX_BCMA +static struct device * __init bcm47xx_setup_device(void) +{ + struct device *dev; + int err; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return NULL; + + err = dev_set_name(dev, "bcm47xx_soc"); + if (err) { + pr_err("Failed to set SoC device name: %d\n", err); + kfree(dev); + return NULL; + } + + err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (err) + pr_err("Failed to set SoC DMA mask: %d\n", err); + + return dev; +} +#endif + /* * This finishes bus initialization doing things that were not possible without * kmalloc. Make sure to call it late enough (after mm_init). @@ -183,6 +208,10 @@ void __init bcm47xx_bus_setup(void) if (bcm47xx_bus_type == BCM47XX_BUS_TYPE_BCMA) { int err; + bcm47xx_bus.bcma.dev = bcm47xx_setup_device(); + if (!bcm47xx_bus.bcma.dev) + panic("Failed to setup SoC device\n"); + err = bcma_host_soc_init(&bcm47xx_bus.bcma); if (err) panic("Failed to initialize BCMA bus (err %d)", err); @@ -235,6 +264,8 @@ static int __init bcm47xx_register_bus_complete(void) #endif #ifdef CONFIG_BCM47XX_BCMA case BCM47XX_BUS_TYPE_BCMA: + if (device_register(bcm47xx_bus.bcma.dev)) + pr_err("Failed to register SoC device\n"); bcma_bus_register(&bcm47xx_bus.bcma.bus); break; #endif diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 2c79ab52977a..8bf43c5a7bc7 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -98,7 +98,7 @@ static void octeon_kexec_smp_down(void *ignored) " sync \n" " synci ($0) \n"); - relocated_kexec_smp_wait(NULL); + kexec_reboot(); } #endif diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 4e4ec779f182..6f981af67826 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -66,6 +66,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AR933X=y CONFIG_SERIAL_AR933X_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h b/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h index c6b63a409641..6dd8ad2409dc 100644 --- a/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h +++ b/arch/mips/include/asm/mach-lantiq/falcon/falcon_irq.h @@ -18,8 +18,6 @@ #define INT_NUM_EXTRA_START (INT_NUM_IM4_IRL0 + 32) #define INT_NUM_IM_OFFSET (INT_NUM_IM1_IRL0 - INT_NUM_IM0_IRL0) -#define MIPS_CPU_TIMER_IRQ 7 - #define MAX_IM 5 #endif /* _FALCON_IRQ__ */ diff --git a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h index 141076325307..0b424214a5e9 100644 --- a/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h +++ b/arch/mips/include/asm/mach-lantiq/xway/lantiq_irq.h @@ -19,8 +19,6 @@ #define LTQ_DMA_CH0_INT (INT_NUM_IM2_IRL0) -#define MIPS_CPU_TIMER_IRQ 7 - #define MAX_IM 5 #endif diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 6256d35dbf4d..bedb5047aff3 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -74,14 +74,15 @@ static int __init vdma_init(void) get_order(VDMA_PGTBL_SIZE)); BUG_ON(!pgtbl); dma_cache_wback_inv((unsigned long)pgtbl, VDMA_PGTBL_SIZE); - pgtbl = (VDMA_PGTBL_ENTRY *)KSEG1ADDR(pgtbl); + pgtbl = (VDMA_PGTBL_ENTRY *)CKSEG1ADDR((unsigned long)pgtbl); /* * Clear the R4030 translation table */ vdma_pgtbl_init(); - r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, CPHYSADDR(pgtbl)); + r4030_write_reg32(JAZZ_R4030_TRSTBL_BASE, + CPHYSADDR((unsigned long)pgtbl)); r4030_write_reg32(JAZZ_R4030_TRSTBL_LIM, VDMA_PGTBL_SIZE); r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index f0bc3312ed11..6549499eb202 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -224,9 +224,11 @@ static struct irq_chip ltq_eiu_type = { .irq_set_type = ltq_eiu_settype, }; -static void ltq_hw_irqdispatch(int module) +static void ltq_hw_irq_handler(struct irq_desc *desc) { + int module = irq_desc_get_irq(desc) - 2; u32 irq; + int hwirq; irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR); if (irq == 0) @@ -237,7 +239,8 @@ static void ltq_hw_irqdispatch(int module) * other bits might be bogus */ irq = __fls(irq); - do_IRQ((int)irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module)); + hwirq = irq + MIPS_CPU_IRQ_CASCADE + (INT_NUM_IM_OFFSET * module); + generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq)); /* if this is a EBU irq, we need to ack it or get a deadlock */ if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT) @@ -245,49 +248,6 @@ static void ltq_hw_irqdispatch(int module) LTQ_EBU_PCC_ISTAT); } -#define DEFINE_HWx_IRQDISPATCH(x) \ - static void ltq_hw ## x ## _irqdispatch(void) \ - { \ - ltq_hw_irqdispatch(x); \ - } -DEFINE_HWx_IRQDISPATCH(0) -DEFINE_HWx_IRQDISPATCH(1) -DEFINE_HWx_IRQDISPATCH(2) -DEFINE_HWx_IRQDISPATCH(3) -DEFINE_HWx_IRQDISPATCH(4) - -#if MIPS_CPU_TIMER_IRQ == 7 -static void ltq_hw5_irqdispatch(void) -{ - do_IRQ(MIPS_CPU_TIMER_IRQ); -} -#else -DEFINE_HWx_IRQDISPATCH(5) -#endif - -static void ltq_hw_irq_handler(struct irq_desc *desc) -{ - ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - int irq; - - if (!pending) { - spurious_interrupt(); - return; - } - - pending >>= CAUSEB_IP; - while (pending) { - irq = fls(pending) - 1; - do_IRQ(MIPS_CPU_IRQ_BASE + irq); - pending &= ~BIT(irq); - } -} - static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct irq_chip *chip = <q_irq_type; @@ -343,38 +303,13 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) for (i = 0; i < MAX_IM; i++) irq_set_chained_handler(i + 2, ltq_hw_irq_handler); - if (cpu_has_vint) { - pr_info("Setting up vectored interrupts\n"); - set_vi_handler(2, ltq_hw0_irqdispatch); - set_vi_handler(3, ltq_hw1_irqdispatch); - set_vi_handler(4, ltq_hw2_irqdispatch); - set_vi_handler(5, ltq_hw3_irqdispatch); - set_vi_handler(6, ltq_hw4_irqdispatch); - set_vi_handler(7, ltq_hw5_irqdispatch); - } - ltq_domain = irq_domain_add_linear(node, (MAX_IM * INT_NUM_IM_OFFSET) + MIPS_CPU_IRQ_CASCADE, &irq_domain_ops, 0); -#ifndef CONFIG_MIPS_MT_SMP - set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | - IE_IRQ3 | IE_IRQ4 | IE_IRQ5); -#else - set_c0_status(IE_SW0 | IE_SW1 | IE_IRQ0 | IE_IRQ1 | - IE_IRQ2 | IE_IRQ3 | IE_IRQ4 | IE_IRQ5); -#endif - /* tell oprofile which irq to use */ ltq_perfcount_irq = irq_create_mapping(ltq_domain, LTQ_PERF_IRQ); - /* - * if the timer irq is not one of the mips irqs we need to - * create a mapping - */ - if (MIPS_CPU_TIMER_IRQ != 7) - irq_create_mapping(ltq_domain, MIPS_CPU_TIMER_IRQ); - /* the external interrupts are optional and xway only */ eiu_node = of_find_compatible_node(NULL, NULL, "lantiq,eiu-xway"); if (eiu_node && !of_address_to_resource(eiu_node, 0, &res)) { @@ -411,7 +346,7 @@ EXPORT_SYMBOL_GPL(get_c0_perfcount_int); unsigned int get_c0_compare_int(void) { - return MIPS_CPU_TIMER_IRQ; + return CP0_LEGACY_COMPARE_IRQ; } static struct of_device_id __initdata of_irq_ids[] = { diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c index 2a5bb849b10e..288b58b00dc8 100644 --- a/arch/mips/pci/msi-octeon.c +++ b/arch/mips/pci/msi-octeon.c @@ -369,7 +369,9 @@ int __init octeon_msi_initialize(void) int irq; struct irq_chip *msi; - if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { + if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_INVALID) { + return 0; + } else if (octeon_dma_bar_type == OCTEON_DMA_BAR_TYPE_PCIE) { msi_rcv_reg[0] = CVMX_PEXP_NPEI_MSI_RCV0; msi_rcv_reg[1] = CVMX_PEXP_NPEI_MSI_RCV1; msi_rcv_reg[2] = CVMX_PEXP_NPEI_MSI_RCV2; diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 0a935c136ec2..ac3482882cf9 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -3,9 +3,6 @@ OBJCOPYFLAGS := -O binary -R .note -R .note.gnu.build-id -R .comment -S KBUILD_DEFCONFIG := defconfig -comma = , - - ifdef CONFIG_FUNCTION_TRACER arch-y += -malways-save-lp -mno-relax endif @@ -54,8 +51,6 @@ endif boot := arch/nds32/boot core-y += $(boot)/dts/ -.PHONY: FORCE - Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ @@ -68,9 +63,6 @@ prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h -CLEAN_FILES += include/asm-nds32/constants.h* - -# We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile index 70e06d34006c..bf10141c7426 100644 --- a/arch/openrisc/Makefile +++ b/arch/openrisc/Makefile @@ -20,7 +20,6 @@ KBUILD_DEFCONFIG := or1ksim_defconfig OBJCOPYFLAGS := -O binary -R .note -R .comment -S -LDFLAGS_vmlinux := LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__ @@ -50,5 +49,3 @@ else BUILTIN_DTB := n endif core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/ - -all: vmlinux diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 19a8834e0398..f9513ad38fa6 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -337,6 +337,7 @@ #define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLWINM 0x54000000 +#define PPC_INST_RLWINM_DOT 0x54000001 #define PPC_INST_RLWIMI 0x50000000 #define PPC_INST_RLDICL 0x78000000 #define PPC_INST_RLDICR 0x78000004 diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h index ff91192407d1..f599064dd8dc 100644 --- a/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/arch/powerpc/include/uapi/asm/perf_regs.h @@ -47,6 +47,7 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DAR, PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, + PERF_REG_POWERPC_MMCRA, PERF_REG_POWERPC_MAX, }; #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 57deb1e9ffea..20cc816b3508 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -852,11 +852,12 @@ start_here: /* set up the PTE pointers for the Abatron bdiGDB. */ - tovirt(r6,r6) lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l stw r6, 0(r5) /* Now turn on the MMU for real! */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bd5e6834ca69..6794466f6420 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -755,11 +755,12 @@ SYSCALL_DEFINE0(rt_sigreturn) if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; - } + } else #endif - /* Fall through, for non-TM restore */ - if (!MSR_TM_ACTIVE(msr)) { + { /* + * Fall through, for non-TM restore + * * Unset MSR[TS] on the thread regs since MSR from user * context does not have MSR active, and recheckpoint was * not called since restore_tm_sigcontexts() was not called diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 29746dc28df5..517662a56bdc 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -967,13 +967,6 @@ out: } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) -unsigned long __init arch_syscall_addr(int nr) -{ - return sys_call_table[nr*2]; -} -#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ - #ifdef PPC64_ELF_ABI_v1 char *arch_ftrace_match_adjust(char *str, const char *search) { diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index c2d5192ed64f..549e9490ff2a 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -165,6 +165,10 @@ #define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ ___PPC_RS(a) | __PPC_SH(i) | \ __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RLWINM_DOT(d, a, i, mb, me) EMIT(PPC_INST_RLWINM_DOT | \ + ___PPC_RA(d) | ___PPC_RS(a) | \ + __PPC_SH(i) | __PPC_MB(mb) | \ + __PPC_ME(me)) #define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ ___PPC_RS(a) | __PPC_SH(i) | \ __PPC_MB(mb) | __PPC_ME(me)) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 7ce57657d3b8..15bba765fa79 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -768,36 +768,58 @@ emit_clear: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: true_cond = COND_GT; goto cond_branch; case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: true_cond = COND_LT; goto cond_branch; case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: true_cond = COND_GE; goto cond_branch; case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP | BPF_JLE | BPF_X: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: true_cond = COND_LE; goto cond_branch; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: true_cond = COND_EQ; goto cond_branch; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: true_cond = COND_NE; goto cond_branch; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: true_cond = COND_NE; /* Fall through */ @@ -809,18 +831,44 @@ cond_branch: case BPF_JMP | BPF_JLE | BPF_X: case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: /* unsigned comparison */ - PPC_CMPLD(dst_reg, src_reg); + if (BPF_CLASS(code) == BPF_JMP32) + PPC_CMPLW(dst_reg, src_reg); + else + PPC_CMPLD(dst_reg, src_reg); break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: /* signed comparison */ - PPC_CMPD(dst_reg, src_reg); + if (BPF_CLASS(code) == BPF_JMP32) + PPC_CMPW(dst_reg, src_reg); + else + PPC_CMPD(dst_reg, src_reg); break; case BPF_JMP | BPF_JSET | BPF_X: - PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg); + case BPF_JMP32 | BPF_JSET | BPF_X: + if (BPF_CLASS(code) == BPF_JMP) { + PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, + src_reg); + } else { + int tmp_reg = b2p[TMP_REG_1]; + + PPC_AND(tmp_reg, dst_reg, src_reg); + PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, + 31); + } break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K: @@ -828,43 +876,87 @@ cond_branch: case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + { + bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; + /* * Need sign-extended load, so only positive * values can be used as imm in cmpldi */ - if (imm >= 0 && imm < 32768) - PPC_CMPLDI(dst_reg, imm); - else { + if (imm >= 0 && imm < 32768) { + if (is_jmp32) + PPC_CMPLWI(dst_reg, imm); + else + PPC_CMPLDI(dst_reg, imm); + } else { /* sign-extending load */ PPC_LI32(b2p[TMP_REG_1], imm); /* ... but unsigned comparison */ - PPC_CMPLD(dst_reg, b2p[TMP_REG_1]); + if (is_jmp32) + PPC_CMPLW(dst_reg, + b2p[TMP_REG_1]); + else + PPC_CMPLD(dst_reg, + b2p[TMP_REG_1]); } break; + } case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + { + bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32; + /* * signed comparison, so any 16-bit value * can be used in cmpdi */ - if (imm >= -32768 && imm < 32768) - PPC_CMPDI(dst_reg, imm); - else { + if (imm >= -32768 && imm < 32768) { + if (is_jmp32) + PPC_CMPWI(dst_reg, imm); + else + PPC_CMPDI(dst_reg, imm); + } else { PPC_LI32(b2p[TMP_REG_1], imm); - PPC_CMPD(dst_reg, b2p[TMP_REG_1]); + if (is_jmp32) + PPC_CMPW(dst_reg, + b2p[TMP_REG_1]); + else + PPC_CMPD(dst_reg, + b2p[TMP_REG_1]); } break; + } case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, - b2p[TMP_REG_1]); + int tmp_reg = b2p[TMP_REG_1]; + + PPC_LI32(tmp_reg, imm); + if (BPF_CLASS(code) == BPF_JMP) { + PPC_AND_DOT(tmp_reg, dst_reg, + tmp_reg); + } else { + PPC_AND(tmp_reg, dst_reg, + tmp_reg); + PPC_RLWINM_DOT(tmp_reg, tmp_reg, + 0, 0, 31); + } } break; } diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 5c36b3a8d47a..3349f3f8fe84 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -70,6 +70,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = { PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar), PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr), PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar), + PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr), }; u64 perf_reg_value(struct pt_regs *regs, int idx) @@ -83,6 +84,11 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) !is_sier_available())) return 0; + if (idx == PERF_REG_POWERPC_MMCRA && + (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) || + IS_ENABLED(CONFIG_PPC32))) + return 0; + return regs_get_register(regs, pt_regs_offset[idx]); } diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c index a1aaa1569d7c..f0e488d97567 100644 --- a/arch/powerpc/platforms/4xx/ocm.c +++ b/arch/powerpc/platforms/4xx/ocm.c @@ -237,12 +237,12 @@ static int ocm_debugfs_show(struct seq_file *m, void *v) continue; seq_printf(m, "PPC4XX OCM : %d\n", ocm->index); - seq_printf(m, "PhysAddr : %pa[p]\n", &(ocm->phys)); + seq_printf(m, "PhysAddr : %pa\n", &(ocm->phys)); seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal); seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal); seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal); - seq_printf(m, "NC.PhysAddr : %pa[p]\n", &(ocm->nc.phys)); + seq_printf(m, "NC.PhysAddr : %pa\n", &(ocm->nc.phys)); seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt); seq_printf(m, "NC.MemTotal : %d Bytes\n", ocm->nc.memtotal); seq_printf(m, "NC.MemFree : %d Bytes\n", ocm->nc.memfree); @@ -252,7 +252,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v) blk->size, blk->owner); } - seq_printf(m, "\nC.PhysAddr : %pa[p]\n", &(ocm->c.phys)); + seq_printf(m, "\nC.PhysAddr : %pa\n", &(ocm->c.phys)); seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt); seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal); seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index e66644e0fb40..9438fa0fc355 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -538,8 +538,7 @@ static void __init chrp_init_IRQ(void) /* see if there is a keyboard in the device tree with a parent of type "adb" */ for_each_node_by_name(kbd, "keyboard") - if (kbd->parent && kbd->parent->type - && strcmp(kbd->parent->type, "adb") == 0) + if (of_node_is_type(kbd->parent, "adb")) break; of_node_put(kbd); if (kbd) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index d7f742ed48ba..3f58c7dbd581 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -564,7 +564,7 @@ struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) } } else { /* Create a group for 1 GPU and attached NPUs for POWER8 */ - pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL); + pe->npucomp = kzalloc(sizeof(*pe->npucomp), GFP_KERNEL); table_group = &pe->npucomp->table_group; table_group->ops = &pnv_npu_peers_ops; iommu_register_group(table_group, hose->global_number, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1d6406a051f1..7db3119f8a5b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2681,7 +2681,8 @@ static void pnv_pci_ioda_setup_iommu_api(void) list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; - if (phb->type == PNV_PHB_NPU_NVLINK) + if (phb->type == PNV_PHB_NPU_NVLINK || + phb->type == PNV_PHB_NPU_OCAPI) continue; list_for_each_entry(pe, &phb->ioda.pe_list, list) { diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 7725825d887d..37a77e57893e 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -264,7 +264,9 @@ void __init pSeries_final_fixup(void) if (!of_device_is_compatible(nvdn->parent, "ibm,power9-npu")) continue; +#ifdef CONFIG_PPC_POWERNV WARN_ON_ONCE(pnv_npu2_init(hose)); +#endif break; } } diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index ccbb53e22024..8d04e6f3f796 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -25,7 +25,7 @@ static inline int init_new_context(struct task_struct *tsk, atomic_set(&mm->context.flush_count, 0); mm->context.gmap_asce = 0; mm->context.flush_mm = 0; - mm->context.compat_mm = 0; + mm->context.compat_mm = test_thread_flag(TIF_31BIT); #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || @@ -90,8 +90,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - if (prev == next) - return; S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); /* Clear previous user-ASCE from CR1 and CR7 */ @@ -103,7 +101,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, __ctl_load(S390_lowcore.vdso_asce, 7, 7); clear_cpu_flag(CIF_ASCE_SECONDARY); } - cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); + if (prev != next) + cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index af5c2b3f7065..a8c7789b246b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -63,10 +63,10 @@ static noinline __init void detect_machine_type(void) if (stsi(vmms, 3, 2, 2) || !vmms->count) return; - /* Running under KVM? If not we assume z/VM */ + /* Detect known hypervisors */ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3)) S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; - else + else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4)) S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 72dd23ef771b..7ed90a759135 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1006,6 +1006,8 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running under KVM in 64-bit mode\n"); else if (MACHINE_IS_LPAR) pr_info("Linux is running natively in 64-bit mode\n"); + else + pr_info("Linux is running as a guest in 64-bit mode\n"); /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f82b3d3c36e2..b198ece2aad6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -381,8 +381,13 @@ void smp_call_online_cpu(void (*func)(void *), void *data) */ void smp_call_ipl_cpu(void (*func)(void *), void *data) { + struct lowcore *lc = pcpu_devices->lowcore; + + if (pcpu_devices[0].address == stap()) + lc = &S390_lowcore; + pcpu_delegate(&pcpu_devices[0], func, data, - pcpu_devices->lowcore->nodat_stack); + lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -1166,7 +1171,11 @@ static ssize_t __ref rescan_store(struct device *dev, { int rc; + rc = lock_device_hotplug_sysfs(); + if (rc) + return rc; rc = smp_rescan_cpus(); + unlock_device_hotplug(); return rc ? rc : count; } static DEVICE_ATTR_WO(rescan); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index ebe748a9f472..4ff354887db4 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -224,10 +224,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_pages = vdso64_pages; #ifdef CONFIG_COMPAT - if (is_compat_task()) { + mm->context.compat_mm = is_compat_task(); + if (mm->context.compat_mm) vdso_pages = vdso32_pages; - mm->context.compat_mm = 1; - } #endif /* * vDSO has a problem and was disabled, just don't "enable" it for diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 3ff758eeb71d..ce9defdff62a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1110,103 +1110,141 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i mask = 0xf000; /* j */ goto branch_oc; case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */ + case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */ mask = 0x2000; /* jh */ goto branch_ks; case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */ + case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */ mask = 0x4000; /* jl */ goto branch_ks; case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */ + case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */ mask = 0xa000; /* jhe */ goto branch_ks; case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */ + case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */ mask = 0xc000; /* jle */ goto branch_ks; case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */ + case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */ mask = 0x2000; /* jh */ goto branch_ku; case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */ + case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */ mask = 0x4000; /* jl */ goto branch_ku; case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */ + case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */ mask = 0xa000; /* jhe */ goto branch_ku; case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */ + case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */ mask = 0xc000; /* jle */ goto branch_ku; case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */ + case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */ mask = 0x7000; /* jne */ goto branch_ku; case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */ + case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */ mask = 0x8000; /* je */ goto branch_ku; case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */ + case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */ mask = 0x7000; /* jnz */ - /* lgfi %w1,imm (load sign extend imm) */ - EMIT6_IMM(0xc0010000, REG_W1, imm); - /* ngr %w1,%dst */ - EMIT4(0xb9800000, REG_W1, dst_reg); + if (BPF_CLASS(insn->code) == BPF_JMP32) { + /* llilf %w1,imm (load zero extend imm) */ + EMIT6_IMM(0xc0010000, REG_W1, imm); + /* nr %w1,%dst */ + EMIT2(0x1400, REG_W1, dst_reg); + } else { + /* lgfi %w1,imm (load sign extend imm) */ + EMIT6_IMM(0xc0010000, REG_W1, imm); + /* ngr %w1,%dst */ + EMIT4(0xb9800000, REG_W1, dst_reg); + } goto branch_oc; case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */ + case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */ mask = 0x2000; /* jh */ goto branch_xs; case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */ + case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */ mask = 0x4000; /* jl */ goto branch_xs; case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */ + case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */ mask = 0xa000; /* jhe */ goto branch_xs; case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */ + case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */ mask = 0xc000; /* jle */ goto branch_xs; case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */ + case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */ mask = 0x2000; /* jh */ goto branch_xu; case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */ + case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */ mask = 0x4000; /* jl */ goto branch_xu; case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */ + case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */ mask = 0xa000; /* jhe */ goto branch_xu; case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */ + case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */ mask = 0xc000; /* jle */ goto branch_xu; case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */ + case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */ mask = 0x7000; /* jne */ goto branch_xu; case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */ + case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */ mask = 0x8000; /* je */ goto branch_xu; case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */ + case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */ + { + bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; + mask = 0x7000; /* jnz */ - /* ngrk %w1,%dst,%src */ - EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg); + /* nrk or ngrk %w1,%dst,%src */ + EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000), + REG_W1, dst_reg, src_reg); goto branch_oc; branch_ks: /* lgfi %w1,imm (load sign extend imm) */ EMIT6_IMM(0xc0010000, REG_W1, imm); - /* cgrj %dst,%w1,mask,off */ - EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask); + /* crj or cgrj %dst,%w1,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), + dst_reg, REG_W1, i, off, mask); break; branch_ku: /* lgfi %w1,imm (load sign extend imm) */ EMIT6_IMM(0xc0010000, REG_W1, imm); - /* clgrj %dst,%w1,mask,off */ - EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask); + /* clrj or clgrj %dst,%w1,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), + dst_reg, REG_W1, i, off, mask); break; branch_xs: - /* cgrj %dst,%src,mask,off */ - EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask); + /* crj or cgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), + dst_reg, src_reg, i, off, mask); break; branch_xu: - /* clgrj %dst,%src,mask,off */ - EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask); + /* clrj or clgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), + dst_reg, src_reg, i, off, mask); break; branch_oc: /* brc mask,jmp_off (branch instruction needs 4 bytes) */ jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); break; + } default: /* too complex, give up */ pr_err("Unknown opcode %02x\n", insn->code); return -1; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 15af091611e2..26387c7bf305 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -198,7 +198,7 @@ config X86 select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH select PCI_DOMAINS if PCI - select PCI_LOCKLESS_CONFIG + select PCI_LOCKLESS_CONFIG if PCI select PERF_EVENTS select RTC_LIB select RTC_MC146818_LIB @@ -617,7 +617,7 @@ config X86_INTEL_QUARK config X86_INTEL_LPSS bool "Intel Low Power Subsystem Support" - depends on X86 && ACPI + depends on X86 && ACPI && PCI select COMMON_CLK select PINCTRL select IOSF_MBI diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 8eaf8952c408..39913770a44d 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -361,7 +361,8 @@ ENTRY(entry_INT80_compat) /* Need to switch before accessing the thread stack. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi + /* In the Xen PV case we already run on the thread stack. */ + ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp pushq 6*8(%rdi) /* regs->ss */ @@ -370,8 +371,9 @@ ENTRY(entry_INT80_compat) pushq 3*8(%rdi) /* regs->cs */ pushq 2*8(%rdi) /* regs->ip */ pushq 1*8(%rdi) /* regs->orig_ax */ - pushq (%rdi) /* pt_regs->di */ +.Lint80_keep_stack: + pushq %rsi /* pt_regs->si */ xorl %esi, %esi /* nospec si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 0ca50611e8ce..19d18fae6ec6 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -178,6 +178,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); +/* + * Init a new mm. Used on mm copies, like at fork() + * and on mm's that are brand-new, like at execve(). + */ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { @@ -228,8 +232,22 @@ do { \ } while (0) #endif +static inline void arch_dup_pkeys(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; + + /* Duplicate the oldmm pkey state in mm: */ + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; +#endif +} + static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { + arch_dup_pkeys(oldmm, mm); paravirt_arch_dup_mmap(oldmm, mm); return ldt_dup_context(oldmm, mm); } diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a77445d1b034..780f2b42c8ef 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -711,7 +711,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t { if (unlikely(!access_ok(ptr,len))) return 0; - __uaccess_begin(); + __uaccess_begin_nospec(); return 1; } #define user_access_begin(a,b) user_access_begin(a,b) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c8b07d8ea5a2..17ffc869cab8 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -470,6 +470,7 @@ int crash_load_segments(struct kimage *image) kbuf.memsz = kbuf.bufsz; kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) { vfree((void *)image->arch.elf_headers); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index b0acb22e5a46..dfd3aca82c61 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -21,10 +21,6 @@ #define HPET_MASK CLOCKSOURCE_MASK(32) -/* FSEC = 10^-15 - NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000L - #define HPET_DEV_USED_BIT 2 #define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) #define HPET_DEV_VALID 0x8 diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 278cd07228dd..0d5efa34f359 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -434,6 +434,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.memsz = PAGE_ALIGN(header->init_size); kbuf.buf_align = header->kernel_alignment; kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) goto out_free_params; @@ -448,6 +449,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; kbuf.buf_min = MIN_INITRD_LOAD_ADDR; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) goto out_free_params; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index ba4bfb7f6a36..5c93a65ee1e5 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,6 +457,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) #else u64 ipi_bitmap = 0; #endif + long ret; if (cpumask_empty(mask)) return; @@ -482,8 +483,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { - kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); min = max = apic_id; ipi_bitmap = 0; } @@ -491,8 +493,9 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } if (ipi_bitmap) { - kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, + ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); } local_irq_restore(flags); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e9f777bfed40..3fae23834069 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -297,15 +297,16 @@ static int __init tsc_setup(char *str) __setup("tsc=", tsc_setup); -#define MAX_RETRIES 5 -#define SMI_TRESHOLD 50000 +#define MAX_RETRIES 5 +#define TSC_DEFAULT_THRESHOLD 0x20000 /* - * Read TSC and the reference counters. Take care of SMI disturbance + * Read TSC and the reference counters. Take care of any disturbances */ static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; + u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD; int i; for (i = 0; i < MAX_RETRIES; i++) { @@ -315,7 +316,7 @@ static u64 tsc_read_refs(u64 *p, int hpet) else *p = acpi_pm_read_early(); t2 = get_cycles(); - if ((t2 - t1) < SMI_TRESHOLD) + if ((t2 - t1) < thresh) return t2; } return ULLONG_MAX; @@ -703,15 +704,15 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void) * zero. In each wait loop iteration we read the TSC and check * the delta to the previous read. We keep track of the min * and max values of that delta. The delta is mostly defined - * by the IO time of the PIT access, so we can detect when a - * SMI/SMM disturbance happened between the two reads. If the + * by the IO time of the PIT access, so we can detect when + * any disturbance happened between the two reads. If the * maximum time is significantly larger than the minimum time, * then we discard the result and have another try. * * 2) Reference counter. If available we use the HPET or the * PMTIMER as a reference to check the sanity of that value. * We use separate TSC readouts and check inside of the - * reference read for a SMI/SMM disturbance. We dicard + * reference read for any possible disturbance. We dicard * disturbed values here as well. We do that around the PIT * calibration delay loop as we have to wait for a certain * amount of time anyway. @@ -744,7 +745,7 @@ static unsigned long pit_hpet_ptimer_calibrate_cpu(void) if (ref1 == ref2) continue; - /* Check, whether the sampling was disturbed by an SMI */ + /* Check, whether the sampling was disturbed */ if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) continue; @@ -1268,7 +1269,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); */ static void tsc_refine_calibration_work(struct work_struct *work) { - static u64 tsc_start = -1, ref_start; + static u64 tsc_start = ULLONG_MAX, ref_start; static int hpet; u64 tsc_stop, ref_stop, delta; unsigned long freq; @@ -1283,14 +1284,15 @@ static void tsc_refine_calibration_work(struct work_struct *work) * delayed the first time we expire. So set the workqueue * again once we know timers are working. */ - if (tsc_start == -1) { + if (tsc_start == ULLONG_MAX) { +restart: /* * Only set hpet once, to avoid mixing hardware * if the hpet becomes enabled later. */ hpet = is_hpet_enabled(); - schedule_delayed_work(&tsc_irqwork, HZ); tsc_start = tsc_read_refs(&ref_start, hpet); + schedule_delayed_work(&tsc_irqwork, HZ); return; } @@ -1300,9 +1302,9 @@ static void tsc_refine_calibration_work(struct work_struct *work) if (ref_start == ref_stop) goto out; - /* Check, whether the sampling was disturbed by an SMI */ - if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) - goto out; + /* Check, whether the sampling was disturbed */ + if (tsc_stop == ULLONG_MAX) + goto restart; delta = tsc_stop - tsc_start; delta *= 1000000LL; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 69b3a7c30013..31ecf7a76d5a 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -2,10 +2,6 @@ ccflags-y += -Iarch/x86/kvm -CFLAGS_x86.o := -I. -CFLAGS_svm.o := -I. -CFLAGS_vmx.o := -I. - KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index c90a5352d158..89d20ed1d2e8 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1636,7 +1636,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); if (ret != HV_STATUS_INVALID_PORT_ID) break; - /* maybe userspace knows this conn_id: fall through */ + /* fall through - maybe userspace knows this conn_id. */ case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { @@ -1832,7 +1832,6 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_MSR_VP_INDEX_AVAILABLE; ent->eax |= HV_X64_MSR_RESET_AVAILABLE; ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; - ent->eax |= HV_X64_MSR_GUEST_IDLE_AVAILABLE; ent->eax |= HV_X64_ACCESS_FREQUENCY_MSRS; ent->eax |= HV_X64_ACCESS_REENLIGHTENMENT; @@ -1848,11 +1847,11 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_ENLIGHTMENT_INFO: ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; - ent->eax |= HV_X64_SYSTEM_RESET_RECOMMENDED; ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; - ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; + if (evmcs_ver) + ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; /* * Default number of spinlock retry attempts, matches diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9f089e2e09d0..4b6c2da7265c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1035,6 +1035,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, switch (delivery_mode) { case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; + /* fall through */ case APIC_DM_FIXED: if (unlikely(trig_mode && !level)) break; @@ -1874,6 +1875,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); + /* fall through */ case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ce770b446238..da9c42349b1f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4371,6 +4371,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, rsvd_bits(maxphyaddr, 51); rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; + /* fall through */ case PT64_ROOT_4LEVEL: rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a157ca5b6869..f13a3a24d360 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3414,6 +3414,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) kvm_mmu_reset_context(&svm->vcpu); kvm_mmu_load(&svm->vcpu); + /* + * Drop what we picked up for L2 via svm_complete_interrupts() so it + * doesn't end up in L1. + */ + svm->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&svm->vcpu); + kvm_clear_interrupt_queue(&svm->vcpu); + return 0; } @@ -4395,7 +4403,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); - /* Follow through */ + /* Fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4504,28 +4512,19 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { - int i; - struct kvm_vcpu *vcpu; - struct kvm *kvm = svm->vcpu.kvm; struct kvm_lapic *apic = svm->vcpu.arch.apic; /* - * At this point, we expect that the AVIC HW has already - * set the appropriate IRR bits on the valid target - * vcpus. So, we just need to kick the appropriate vcpu. + * Update ICR high and low, then emulate sending IPI, + * which is handled when writing APIC_ICR. */ - kvm_for_each_vcpu(i, vcpu, kvm) { - bool m = kvm_apic_match_dest(vcpu, apic, - icrl & KVM_APIC_SHORT_MASK, - GET_APIC_DEST_FIELD(icrh), - icrl & KVM_APIC_DEST_MASK); - - if (m && !avic_vcpu_is_running(vcpu)) - kvm_vcpu_wake_up(vcpu); - } + kvm_lapic_reg_write(apic, APIC_ICR2, icrh); + kvm_lapic_reg_write(apic, APIC_ICR, icrl); break; } case AVIC_IPI_FAILURE_INVALID_TARGET: + WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n", + index, svm->vcpu.vcpu_id, icrh, icrl); break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 705f40ae2532..6432d08c7de7 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1465,7 +1465,7 @@ TRACE_EVENT(kvm_hv_send_ipi_ex, #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH arch/x86/kvm +#define TRACE_INCLUDE_PATH ../../arch/x86/kvm #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 95bc2247478d..5466c6d85cf3 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -332,16 +332,17 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version) { struct vcpu_vmx *vmx = to_vmx(vcpu); + bool evmcs_already_enabled = vmx->nested.enlightened_vmcs_enabled; + + vmx->nested.enlightened_vmcs_enabled = true; if (vmcs_version) *vmcs_version = nested_get_evmcs_version(vcpu); /* We don't support disabling the feature for simplicity. */ - if (vmx->nested.enlightened_vmcs_enabled) + if (evmcs_already_enabled) return 0; - vmx->nested.enlightened_vmcs_enabled = true; - vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2616bd2c7f2c..8ff20523661b 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -55,7 +55,7 @@ static u16 shadow_read_write_fields[] = { static int max_shadow_read_write_fields = ARRAY_SIZE(shadow_read_write_fields); -void init_vmcs_shadow_fields(void) +static void init_vmcs_shadow_fields(void) { int i, j; @@ -4140,11 +4140,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; - vmx->nested.cached_shadow_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_shadow_vmcs12) goto out_cached_shadow_vmcs12; @@ -5263,13 +5263,17 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, copy_shadow_to_vmcs12(vmx); } - if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12))) + /* + * Copy over the full allocated size of vmcs12 rather than just the size + * of the struct. + */ + if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE)) return -EFAULT; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, - get_shadow_vmcs12(vcpu), sizeof(*vmcs12))) + get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f6915f10e584..4341175339f3 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -423,7 +423,7 @@ static void check_ept_pointer_match(struct kvm *kvm) to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; } -int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, +static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush, void *data) { struct kvm_tlb_range *range = data; @@ -1773,7 +1773,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) return 1; - /* Otherwise falls through */ + /* Else, falls through */ default: msr = find_msr_entry(vmx, msr_info->index); if (msr) { @@ -2014,7 +2014,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) return 1; - /* Otherwise falls through */ + /* Else, falls through */ default: msr = find_msr_entry(vmx, msr_index); if (msr) { @@ -2344,7 +2344,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, case 37: /* AAT100 */ case 44: /* BC86,AAY89,BD102 */ case 46: /* BA97 */ - _vmexit_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL " "does not work properly. Using workaround\n"); @@ -6362,72 +6362,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->hv_timer_armed = false; } -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4, evmcs_rsp; - - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!enable_vnmi && - vmx->loaded_vmcs->soft_vnmi_blocked)) - vmx->loaded_vmcs->entry_time = ktime_get(); - - /* Don't enter VMX if guest state is invalid, let the exit handler - start emulation until we arrive back to a valid state */ - if (vmx->emulation_required) - return; - - if (vmx->ple_window_dirty) { - vmx->ple_window_dirty = false; - vmcs_write32(PLE_WINDOW, vmx->ple_window); - } - - if (vmx->nested.need_vmcs12_sync) - nested_sync_from_vmcs12(vcpu); - - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); - - cr3 = __get_current_cr3_fast(); - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { - vmcs_writel(HOST_CR3, cr3); - vmx->loaded_vmcs->host_state.cr3 = cr3; - } - - cr4 = cr4_read_shadow(); - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { - vmcs_writel(HOST_CR4, cr4); - vmx->loaded_vmcs->host_state.cr4 = cr4; - } - - /* When single-stepping over STI and MOV SS, we must clear the - * corresponding interruptibility bits in the guest state. Otherwise - * vmentry fails as it then expects bit 14 (BS) in pending debug - * exceptions being set, but that's not correct for the guest debugging - * case. */ - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmx_set_interrupt_shadow(vcpu, 0); - - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - - pt_guest_enter(vmx); - - atomic_switch_perf_msrs(vmx); - - vmx_update_hv_timer(vcpu); - - /* - * If this vCPU has touched SPEC_CTRL, restore the guest's value if - * it's non-zero. Since vmentry is serialising on affected CPUs, there - * is no need to worry about the conditional branch over the wrmsr - * being speculatively taken. - */ - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + unsigned long evmcs_rsp; vmx->__launched = vmx->loaded_vmcs->launched; @@ -6567,6 +6504,77 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) , "eax", "ebx", "edi" #endif ); +} +STACK_FRAME_NON_STANDARD(__vmx_vcpu_run); + +static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long cr3, cr4; + + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->entry_time = ktime_get(); + + /* Don't enter VMX if guest state is invalid, let the exit handler + start emulation until we arrive back to a valid state */ + if (vmx->emulation_required) + return; + + if (vmx->ple_window_dirty) { + vmx->ple_window_dirty = false; + vmcs_write32(PLE_WINDOW, vmx->ple_window); + } + + if (vmx->nested.need_vmcs12_sync) + nested_sync_from_vmcs12(vcpu); + + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + + cr4 = cr4_read_shadow(); + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->loaded_vmcs->host_state.cr4 = cr4; + } + + /* When single-stepping over STI and MOV SS, we must clear the + * corresponding interruptibility bits in the guest state. Otherwise + * vmentry fails as it then expects bit 14 (BS) in pending debug + * exceptions being set, but that's not correct for the guest debugging + * case. */ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vmx_set_interrupt_shadow(vcpu, 0); + + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); + + pt_guest_enter(vmx); + + atomic_switch_perf_msrs(vmx); + + vmx_update_hv_timer(vcpu); + + /* + * If this vCPU has touched SPEC_CTRL, restore the guest's value if + * it's non-zero. Since vmentry is serialising on affected CPUs, there + * is no need to worry about the conditional branch over the wrmsr + * being speculatively taken. + */ + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + __vmx_vcpu_run(vcpu, vmx); /* * We do not use IBRS in the kernel. If this vCPU has used the @@ -6648,7 +6656,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } -STACK_FRAME_NON_STANDARD(vmx_vcpu_run); static struct kvm *vmx_vm_alloc(void) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02c8e095a239..3d27206f6c01 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3834,6 +3834,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_HYPERV_SYNIC2: if (cap->args[0]) return -EINVAL; + /* fall through */ + case KVM_CAP_HYPERV_SYNIC: if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; @@ -6480,8 +6482,7 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE && - (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + if (r == EMULATE_DONE && ctxt->tf) kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) @@ -7093,10 +7094,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_CLOCK_PAIRING: ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; +#endif case KVM_HC_SEND_IPI: ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); break; -#endif default: ret = -KVM_ENOSYS; break; @@ -7937,6 +7938,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + /* fall through */ case KVM_MP_STATE_RUNNABLE: vcpu->arch.apf.halted = false; break; diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c index 79778ab200e4..a53665116458 100644 --- a/arch/x86/lib/kaslr.c +++ b/arch/x86/lib/kaslr.c @@ -36,8 +36,8 @@ static inline u16 i8254(void) u16 status, timer; do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0, + I8254_PORT_CONTROL); status = inb(I8254_PORT_COUNTER0); timer = inb(I8254_PORT_COUNTER0); timer |= inb(I8254_PORT_COUNTER0) << 8; diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index a19ef1a416ff..4aa9b1480866 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -158,8 +158,8 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) pmd = pmd_offset(pud, ppd->vaddr); if (pmd_none(*pmd)) { pte = ppd->pgtable_area; - memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); - ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; + memset(pte, 0, sizeof(*pte) * PTRS_PER_PTE); + ppd->pgtable_area += sizeof(*pte) * PTRS_PER_PTE; set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5542303c43d9..afabf597c855 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -881,20 +881,41 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: /* cmp dst_reg, src_reg */ - EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, - add_2reg(0xC0, dst_reg, src_reg)); + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: /* test dst_reg, src_reg */ - EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, - add_2reg(0xC0, dst_reg, src_reg)); + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ - EMIT1(add_1mod(0x48, dst_reg)); + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; @@ -908,8 +929,21 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: /* cmp dst_reg, imm8/32 */ - EMIT1(add_1mod(0x48, dst_reg)); + if (BPF_CLASS(insn->code) == BPF_JMP) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 8f6cc71e0848..0d9cdffce6ac 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2072,7 +2072,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: { + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: { + bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; u8 sreg_lo = sstk ? IA32_ECX : src_lo; @@ -2081,25 +2092,35 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (dstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), - STACK_VAR(dst_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); } if (sstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), - STACK_VAR(src_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EBX), + STACK_VAR(src_hi)); } - /* cmp dreg_hi,sreg_hi */ - EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); - EMIT2(IA32_JNE, 2); + if (is_jmp64) { + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 2); + } /* cmp dreg_lo,sreg_lo */ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); goto emit_cond_jmp; } - case BPF_JMP | BPF_JSET | BPF_X: { + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: { + bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; u8 sreg_lo = sstk ? IA32_ECX : src_lo; @@ -2108,15 +2129,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (dstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), - STACK_VAR(dst_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); } if (sstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), - STACK_VAR(src_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EBX), + STACK_VAR(src_hi)); } /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); @@ -2126,32 +2153,39 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); goto emit_cond_jmp; } - case BPF_JMP | BPF_JSET | BPF_K: { - u32 hi; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: { + bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; u8 sreg_lo = IA32_ECX; u8 sreg_hi = IA32_EBX; + u32 hi; if (dstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), - STACK_VAR(dst_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); } - hi = imm32 & (1<<31) ? (u32)~0 : 0; /* mov ecx,imm32 */ - EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); - /* mov ebx,imm32 */ - EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); + EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32); /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); - /* and dreg_hi,sreg_hi */ - EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); - /* or dreg_lo,dreg_hi */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + if (is_jmp64) { + hi = imm32 & (1 << 31) ? (u32)~0 : 0; + /* mov ebx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi); + /* and dreg_hi,sreg_hi */ + EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); + /* or dreg_lo,dreg_hi */ + EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + } goto emit_cond_jmp; } case BPF_JMP | BPF_JEQ | BPF_K: @@ -2163,29 +2197,44 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP | BPF_JSLT | BPF_K: - case BPF_JMP | BPF_JSGE | BPF_K: { - u32 hi; + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: { + bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; u8 sreg_lo = IA32_ECX; u8 sreg_hi = IA32_EBX; + u32 hi; if (dstk) { EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst_lo)); - EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), - STACK_VAR(dst_hi)); + if (is_jmp64) + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); } - hi = imm32 & (1<<31) ? (u32)~0 : 0; /* mov ecx,imm32 */ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); - /* mov ebx,imm32 */ - EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); - - /* cmp dreg_hi,sreg_hi */ - EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); - EMIT2(IA32_JNE, 2); + if (is_jmp64) { + hi = imm32 & (1 << 31) ? (u32)~0 : 0; + /* mov ebx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 2); + } /* cmp dreg_lo,sreg_lo */ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 2f6787fc7106..c54a493e139a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -898,10 +898,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) val = native_read_msr_safe(msr, err); switch (msr) { case MSR_IA32_APICBASE: -#ifdef CONFIG_X86_X2APIC - if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31)))) -#endif - val &= ~X2APIC_ENABLE; + val &= ~X2APIC_ENABLE; break; } return val; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 72bf446c3fee..6e29794573b7 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -361,8 +361,6 @@ void xen_timer_resume(void) { int cpu; - pvclock_resume(); - if (xen_clockevent != &xen_vcpuop_clockevent) return; @@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = { }; static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; +static u64 xen_clock_value_saved; void xen_save_time_memory_area(void) { struct vcpu_register_time_memory_area t; int ret; + xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset; + if (!xen_clock) return; @@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void) int ret; if (!xen_clock) - return; + goto out; t.addr.v = &xen_clock->pvti; @@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void) if (ret != 0) pr_notice("Cannot restore secondary vcpu_time_info (err %d)", ret); + +out: + /* Need pvclock_resume() before using xen_clocksource_read(). */ + pvclock_resume(); + xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved; } static void xen_setup_vsyscall_time_info(void) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 63e0f12be7c9..72adbbe975d5 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1154,15 +1154,14 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, } /** - * __bfq_deactivate_entity - deactivate an entity from its service tree. - * @entity: the entity to deactivate. + * __bfq_deactivate_entity - update sched_data and service trees for + * entity, so as to represent entity as inactive + * @entity: the entity being deactivated. * @ins_into_idle_tree: if false, the entity will not be put into the * idle tree. * - * Deactivates an entity, independently of its previous state. Must - * be invoked only if entity is on a service tree. Extracts the entity - * from that tree, and if necessary and allowed, puts it into the idle - * tree. + * If necessary and allowed, puts entity into the idle tree. NOTE: + * entity may be on no tree if in service. */ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) { diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c index fb2c82c351e4..038cb627c868 100644 --- a/block/blk-mq-debugfs-zoned.c +++ b/block/blk-mq-debugfs-zoned.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Western Digital Corporation or its affiliates. - * - * This file is released under the GPL. */ #include diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 90d68760af08..f8120832ca7b 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -308,8 +308,9 @@ static const char *const cmd_flag_name[] = { CMD_FLAG_NAME(PREFLUSH), CMD_FLAG_NAME(RAHEAD), CMD_FLAG_NAME(BACKGROUND), - CMD_FLAG_NAME(NOUNMAP), CMD_FLAG_NAME(NOWAIT), + CMD_FLAG_NAME(NOUNMAP), + CMD_FLAG_NAME(HIPRI), }; #undef CMD_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ba37b9e15e9..8f5b533764ca 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1906,7 +1906,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = op_is_flush(bio->bi_opf); - struct blk_mq_alloc_data data = { .flags = 0, .cmd_flags = bio->bi_opf }; + struct blk_mq_alloc_data data = { .flags = 0}; struct request *rq; struct blk_plug *plug; struct request *same_queue_rq = NULL; @@ -1928,6 +1928,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) rq_qos_throttle(q, bio); + data.cmd_flags = bio->bi_opf; rq = blk_mq_get_request(q, bio, &data); if (unlikely(!rq)) { rq_qos_cleanup(q, bio); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index f0c56649775f..fd166fbb0f65 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -597,7 +597,7 @@ static void wbt_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); } -void wbt_issue(struct rq_qos *rqos, struct request *rq) +static void wbt_issue(struct rq_qos *rqos, struct request *rq) { struct rq_wb *rwb = RQWB(rqos); @@ -617,7 +617,7 @@ void wbt_issue(struct rq_qos *rqos, struct request *rq) } } -void wbt_requeue(struct rq_qos *rqos, struct request *rq) +static void wbt_requeue(struct rq_qos *rqos, struct request *rq) { struct rq_wb *rwb = RQWB(rqos); if (!rwb_enabled(rwb)) diff --git a/crypto/adiantum.c b/crypto/adiantum.c index 6651e713c45d..5564e73266a6 100644 --- a/crypto/adiantum.c +++ b/crypto/adiantum.c @@ -539,6 +539,8 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) ictx = skcipher_instance_ctx(inst); /* Stream cipher, e.g. "xchacha12" */ + crypto_set_skcipher_spawn(&ictx->streamcipher_spawn, + skcipher_crypto_instance(inst)); err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name, 0, crypto_requires_sync(algt->type, algt->mask)); @@ -547,6 +549,8 @@ static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn); /* Block cipher, e.g. "aes" */ + crypto_set_spawn(&ictx->blockcipher_spawn, + skcipher_crypto_instance(inst)); err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); if (err) diff --git a/crypto/authenc.c b/crypto/authenc.c index 37f54d1b2f66..4be293a4b5f0 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -58,14 +58,22 @@ int crypto_authenc_extractkeys(struct crypto_authenc_keys *keys, const u8 *key, return -EINVAL; if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) return -EINVAL; - if (RTA_PAYLOAD(rta) < sizeof(*param)) + + /* + * RTA_OK() didn't align the rtattr's payload when validating that it + * fits in the buffer. Yet, the keys should start on the next 4-byte + * aligned boundary. To avoid confusion, require that the rtattr + * payload be exactly the param struct, which has a 4-byte aligned size. + */ + if (RTA_PAYLOAD(rta) != sizeof(*param)) return -EINVAL; + BUILD_BUG_ON(sizeof(*param) % RTA_ALIGNTO); param = RTA_DATA(rta); keys->enckeylen = be32_to_cpu(param->enckeylen); - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); + key += rta->rta_len; + keylen -= rta->rta_len; if (keylen < keys->enckeylen) return -EINVAL; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 80a25cc04aec..4741fe89ba2c 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -279,7 +279,7 @@ static void authenc_esn_verify_ahash_done(struct crypto_async_request *areq, struct aead_request *req = areq->data; err = err ?: crypto_authenc_esn_decrypt_tail(req, 0); - aead_request_complete(req, err); + authenc_esn_request_complete(req, err); } static int crypto_authenc_esn_decrypt(struct aead_request *req) diff --git a/crypto/sm3_generic.c b/crypto/sm3_generic.c index 9a5c60f08aad..c0cf87ae7ef6 100644 --- a/crypto/sm3_generic.c +++ b/crypto/sm3_generic.c @@ -100,7 +100,7 @@ static void sm3_compress(u32 *w, u32 *wt, u32 *m) for (i = 0; i <= 63; i++) { - ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i)), 7); + ss1 = rol32((rol32(a, 12) + e + rol32(t(i), i & 31)), 7); ss2 = ss1 ^ rol32(a, 12); diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 7c6afc111d76..bb857421c2e8 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -41,7 +41,8 @@ acpi-y += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o acpi-$(CONFIG_PCI) += pci_root.o pci_link.o pci_irq.o obj-$(CONFIG_ACPI_MCFG) += pci_mcfg.o -acpi-y += acpi_lpss.o acpi_apd.o +acpi-$(CONFIG_PCI) += acpi_lpss.o +acpi-y += acpi_apd.o acpi-y += acpi_platform.o acpi-y += acpi_pnp.o acpi-$(CONFIG_ARM_AMBA) += acpi_amba.o diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 99d820a693a8..5c093ce01bcd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1054,18 +1054,6 @@ void __init acpi_early_init(void) goto error0; } - /* - * ACPI 2.0 requires the EC driver to be loaded and work before - * the EC device is found in the namespace (i.e. before - * acpi_load_tables() is called). - * - * This is accomplished by looking for the ECDT table, and getting - * the EC parameters out of that. - * - * Ignore the result. Not having an ECDT is not fatal. - */ - status = acpi_ec_ecdt_probe(); - #ifdef CONFIG_X86 if (!acpi_ioapic) { /* compatible (0) means level (3) */ @@ -1142,6 +1130,18 @@ static int __init acpi_bus_init(void) goto error1; } + /* + * ACPI 2.0 requires the EC driver to be loaded and work before the EC + * device is found in the namespace. + * + * This is accomplished by looking for the ECDT table and getting the EC + * parameters out of that. + * + * Do that before calling acpi_initialize_objects() which may trigger EC + * address space accesses. + */ + acpi_ec_ecdt_probe(); + status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE); if (ACPI_FAILURE(status)) { printk(KERN_ERR PREFIX diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 7e6952edb5b0..6a9e1fb8913a 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -81,7 +81,11 @@ void acpi_debugfs_init(void); #else static inline void acpi_debugfs_init(void) { return; } #endif +#ifdef CONFIG_PCI void acpi_lpss_init(void); +#else +static inline void acpi_lpss_init(void) {} +#endif void acpi_apd_init(void); diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 011d3db19c80..e18ade5d74e9 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -26,7 +26,6 @@ #include #include "intel.h" #include "nfit.h" -#include "intel.h" /* * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is @@ -78,12 +77,6 @@ const guid_t *to_nfit_uuid(enum nfit_uuids id) } EXPORT_SYMBOL(to_nfit_uuid); -static struct acpi_nfit_desc *to_acpi_nfit_desc( - struct nvdimm_bus_descriptor *nd_desc) -{ - return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); -} - static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; @@ -416,10 +409,36 @@ static bool payload_dumpable(struct nvdimm *nvdimm, unsigned int func) return true; } +static int cmd_to_func(struct nfit_mem *nfit_mem, unsigned int cmd, + struct nd_cmd_pkg *call_pkg) +{ + if (call_pkg) { + int i; + + if (nfit_mem->family != call_pkg->nd_family) + return -ENOTTY; + + for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) + if (call_pkg->nd_reserved2[i]) + return -EINVAL; + return call_pkg->nd_command; + } + + /* Linux ND commands == NVDIMM_FAMILY_INTEL function numbers */ + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + return cmd; + + /* + * Force function number validation to fail since 0 is never + * published as a valid function in dsm_mask. + */ + return 0; +} + int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { - struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); union acpi_object in_obj, in_buf, *out_obj; const struct nd_cmd_desc *desc = NULL; @@ -429,30 +448,23 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned long cmd_mask, dsm_mask; u32 offset, fw_status = 0; acpi_handle handle; - unsigned int func; const guid_t *guid; - int rc, i; + int func, rc, i; if (cmd_rc) *cmd_rc = -EINVAL; - func = cmd; - if (cmd == ND_CMD_CALL) { - call_pkg = buf; - func = call_pkg->nd_command; - - for (i = 0; i < ARRAY_SIZE(call_pkg->nd_reserved2); i++) - if (call_pkg->nd_reserved2[i]) - return -EINVAL; - } if (nvdimm) { struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; - if (call_pkg && nfit_mem->family != call_pkg->nd_family) - return -ENOTTY; + if (cmd == ND_CMD_CALL) + call_pkg = buf; + func = cmd_to_func(nfit_mem, cmd, call_pkg); + if (func < 0) + return func; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); @@ -463,6 +475,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); + func = cmd; cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; dsm_mask = cmd_mask; @@ -477,7 +490,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask)) + /* + * Check for a valid command. For ND_CMD_CALL, we also have to + * make sure that the DSM function is supported. + */ + if (cmd == ND_CMD_CALL && !test_bit(func, &dsm_mask)) + return -ENOTTY; + else if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; @@ -721,6 +740,7 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags) struct acpi_nfit_memory_map *memdev; struct acpi_nfit_desc *acpi_desc; struct nfit_mem *nfit_mem; + u16 physical_id; mutex_lock(&acpi_desc_lock); list_for_each_entry(acpi_desc, &acpi_descs, list) { @@ -728,10 +748,11 @@ int nfit_get_smbios_id(u32 device_handle, u16 *flags) list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { memdev = __to_nfit_memdev(nfit_mem); if (memdev->device_handle == device_handle) { + *flags = memdev->flags; + physical_id = memdev->physical_id; mutex_unlock(&acpi_desc->init_mutex); mutex_unlock(&acpi_desc_lock); - *flags = memdev->flags; - return memdev->physical_id; + return physical_id; } } mutex_unlock(&acpi_desc->init_mutex); @@ -1872,6 +1893,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } + /* + * Function 0 is the command interrogation function, don't + * export it to potential userspace use, and enable it to be + * used as an error value in acpi_nfit_ctl(). + */ + dsm_mask &= ~1UL; + guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev_dimm->handle, guid, @@ -2047,11 +2075,6 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if (!nvdimm) continue; - rc = nvdimm_security_setup_events(nvdimm); - if (rc < 0) - dev_warn(acpi_desc->dev, - "security event setup failed: %d\n", rc); - nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); if (nfit_kernfs) nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, @@ -2231,7 +2254,6 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) return -ENOMEM; - ndr_desc->nd_set = nd_set; guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid); info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); @@ -3367,7 +3389,7 @@ EXPORT_SYMBOL_GPL(acpi_nfit_init); static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) { - struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ @@ -3384,7 +3406,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) static int __acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { - struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); if (nvdimm) return 0; diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c index 850b2927b4e7..f70de71f79d6 100644 --- a/drivers/acpi/nfit/intel.c +++ b/drivers/acpi/nfit/intel.c @@ -146,7 +146,7 @@ static int intel_security_change_key(struct nvdimm *nvdimm, static void nvdimm_invalidate_cache(void); -static int intel_security_unlock(struct nvdimm *nvdimm, +static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm, const struct nvdimm_key_data *key_data) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); @@ -227,7 +227,7 @@ static int intel_security_disable(struct nvdimm *nvdimm, return 0; } -static int intel_security_erase(struct nvdimm *nvdimm, +static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm, const struct nvdimm_key_data *key, enum nvdimm_passphrase_type ptype) { @@ -276,7 +276,7 @@ static int intel_security_erase(struct nvdimm *nvdimm, return 0; } -static int intel_security_query_overwrite(struct nvdimm *nvdimm) +static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm) { int rc; struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); @@ -313,7 +313,7 @@ static int intel_security_query_overwrite(struct nvdimm *nvdimm) return 0; } -static int intel_security_overwrite(struct nvdimm *nvdimm, +static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm, const struct nvdimm_key_data *nkey) { int rc; diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7496b10532aa..6a2185eb66c5 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +32,7 @@ #include #include #include -#include +#include #include "binder_internal.h" @@ -39,13 +41,31 @@ #define INODE_OFFSET 3 #define INTSTRLEN 21 #define BINDERFS_MAX_MINOR (1U << MINORBITS) - -static struct vfsmount *binderfs_mnt; +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + */ +struct binderfs_mount_opts { + int max; +}; + +enum { + Opt_max, + Opt_err +}; + +static const match_table_t tokens = { + { Opt_max, "max=%d" }, + { Opt_err, NULL } +}; + /** * binderfs_info - information about a binderfs mount * @ipc_ns: The ipc namespace the binderfs mount belongs to. @@ -55,13 +75,16 @@ static DEFINE_IDA(binderfs_minors); * created. * @root_gid: gid that needs to be used when a new binder device is * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. */ struct binderfs_info { struct ipc_namespace *ipc_ns; struct dentry *control_dentry; kuid_t root_uid; kgid_t root_gid; - + struct binderfs_mount_opts mount_opts; + int device_count; }; static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) @@ -84,7 +107,7 @@ bool is_binderfs_device(const struct inode *inode) * @userp: buffer to copy information about new device for userspace to * @req: struct binderfs_device as copied from userspace * - * This function allocated a new binder_device and reserves a new minor + * This function allocates a new binder_device and reserves a new minor * number for it. * Minor numbers are limited and tracked globally in binderfs_minors. The * function will stash a struct binder_device for the specific binder @@ -100,20 +123,34 @@ static int binderfs_binder_device_create(struct inode *ref_inode, struct binderfs_device *req) { int minor, ret; - struct dentry *dentry, *dup, *root; + struct dentry *dentry, *root; struct binder_device *device; - size_t name_len = BINDERFS_MAX_NAME + 1; char *name = NULL; + size_t name_len; struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); - minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL); - mutex_unlock(&binderfs_minors_mutex); - if (minor < 0) + if (++info->device_count <= info->mount_opts.max) + minor = ida_alloc_max(&binderfs_minors, + use_reserve ? BINDERFS_MAX_MINOR : + BINDERFS_MAX_MINOR_CAPPED, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); return minor; + } + mutex_unlock(&binderfs_minors_mutex); ret = -ENOMEM; device = kzalloc(sizeof(*device), GFP_KERNEL); @@ -132,12 +169,13 @@ static int binderfs_binder_device_create(struct inode *ref_inode, inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; - name = kmalloc(name_len, GFP_KERNEL); + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + name_len = strlen(req->name); + /* Make sure to include terminating NUL byte */ + name = kmemdup(req->name, name_len + 1, GFP_KERNEL); if (!name) goto err; - strscpy(name, req->name, name_len); - device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -156,28 +194,25 @@ static int binderfs_binder_device_create(struct inode *ref_inode, root = sb->s_root; inode_lock(d_inode(root)); - dentry = d_alloc_name(root, name); - if (!dentry) { + + /* look it up */ + dentry = lookup_one_len(name, root, name_len); + if (IS_ERR(dentry)) { inode_unlock(d_inode(root)); - ret = -ENOMEM; + ret = PTR_ERR(dentry); goto err; } - /* Verify that the name userspace gave us is not already in use. */ - dup = d_lookup(root, &dentry->d_name); - if (dup) { - if (d_really_is_positive(dup)) { - dput(dup); - dput(dentry); - inode_unlock(d_inode(root)); - ret = -EEXIST; - goto err; - } - dput(dup); + if (d_really_is_positive(dentry)) { + /* already exists */ + dput(dentry); + inode_unlock(d_inode(root)); + ret = -EEXIST; + goto err; } inode->i_private = device; - d_add(dentry, inode); + d_instantiate(dentry, inode); fsnotify_create(root->d_inode, dentry); inode_unlock(d_inode(root)); @@ -187,6 +222,7 @@ err: kfree(name); kfree(device); mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_free(&binderfs_minors, minor); mutex_unlock(&binderfs_minors_mutex); iput(inode); @@ -232,6 +268,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_I(inode); clear_inode(inode); @@ -239,6 +276,7 @@ static void binderfs_evict_inode(struct inode *inode) return; mutex_lock(&binderfs_minors_mutex); + --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); @@ -246,43 +284,87 @@ static void binderfs_evict_inode(struct inode *inode) kfree(device); } +/** + * binderfs_parse_mount_opts - parse binderfs mount options + * @data: options to set (can be NULL in which case defaults are used) + */ +static int binderfs_parse_mount_opts(char *data, + struct binderfs_mount_opts *opts) +{ + char *p; + opts->max = BINDERFS_MAX_MINOR; + + while ((p = strsep(&data, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + int max_devices; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_max: + if (match_int(&args[0], &max_devices) || + (max_devices < 0 || + (max_devices > BINDERFS_MAX_MINOR))) + return -EINVAL; + + opts->max = max_devices; + break; + default: + pr_err("Invalid mount options\n"); + return -EINVAL; + } + } + + return 0; +} + +static int binderfs_remount(struct super_block *sb, int *flags, char *data) +{ + struct binderfs_info *info = sb->s_fs_info; + return binderfs_parse_mount_opts(data, &info->mount_opts); +} + +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info; + + info = root->d_sb->s_fs_info; + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + + return 0; +} + static const struct super_operations binderfs_super_ops = { - .statfs = simple_statfs, - .evict_inode = binderfs_evict_inode, + .evict_inode = binderfs_evict_inode, + .remount_fs = binderfs_remount, + .show_options = binderfs_show_mount_opts, + .statfs = simple_statfs, }; +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + return info->control_dentry == dentry; +} + static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { - struct inode *inode = d_inode(old_dentry); - - /* binderfs doesn't support directories. */ - if (d_is_dir(old_dentry)) + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) return -EPERM; - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - - if (!simple_empty(new_dentry)) - return -ENOTEMPTY; - - if (d_really_is_positive(new_dentry)) - simple_unlink(new_dir, new_dentry); - - old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = - new_dir->i_mtime = inode->i_ctime = current_time(old_dir); - - return 0; + return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { - /* - * The control dentry is only ever touched during mount so checking it - * here should not require us to take lock. - */ - if (BINDERFS_I(dir)->control_dentry == dentry) + if (is_binderfs_control_device(dentry)) return -EPERM; return simple_unlink(dir, dentry); @@ -318,8 +400,6 @@ static int binderfs_binder_ctl_create(struct super_block *sb) if (!device) return -ENOMEM; - inode_lock(d_inode(root)); - /* If we have already created a binder-control node, return. */ if (info->control_dentry) { ret = 0; @@ -358,12 +438,10 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_private = device; info->control_dentry = dentry; d_add(dentry, inode); - inode_unlock(d_inode(root)); return 0; out: - inode_unlock(d_inode(root)); kfree(device); iput(inode); @@ -378,12 +456,9 @@ static const struct inode_operations binderfs_dir_inode_operations = { static int binderfs_fill_super(struct super_block *sb, void *data, int silent) { + int ret; struct binderfs_info *info; - int ret = -ENOMEM; struct inode *inode = NULL; - struct ipc_namespace *ipc_ns = sb->s_fs_info; - - get_ipc_ns(ipc_ns); sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -405,11 +480,17 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &binderfs_super_ops; sb->s_time_gran = 1; - info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); - if (!info) - goto err_without_dentry; + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); + + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + return ret; - info->ipc_ns = ipc_ns; info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) info->root_gid = GLOBAL_ROOT_GID; @@ -417,11 +498,9 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) if (!uid_valid(info->root_uid)) info->root_uid = GLOBAL_ROOT_UID; - sb->s_fs_info = info; - inode = new_inode(sb); if (!inode) - goto err_without_dentry; + return -ENOMEM; inode->i_ino = FIRST_INODE; inode->i_fop = &simple_dir_operations; @@ -432,79 +511,28 @@ static int binderfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_root = d_make_root(inode); if (!sb->s_root) - goto err_without_dentry; + return -ENOMEM; - ret = binderfs_binder_ctl_create(sb); - if (ret) - goto err_with_dentry; - - return 0; - -err_with_dentry: - dput(sb->s_root); - sb->s_root = NULL; - -err_without_dentry: - put_ipc_ns(ipc_ns); - iput(inode); - kfree(info); - - return ret; -} - -static int binderfs_test_super(struct super_block *sb, void *data) -{ - struct binderfs_info *info = sb->s_fs_info; - - if (info) - return info->ipc_ns == data; - - return 0; -} - -static int binderfs_set_super(struct super_block *sb, void *data) -{ - sb->s_fs_info = data; - return set_anon_super(sb, NULL); + return binderfs_binder_ctl_create(sb); } static struct dentry *binderfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct super_block *sb; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - - if (!ns_capable(ipc_ns->user_ns, CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); - - sb = sget_userns(fs_type, binderfs_test_super, binderfs_set_super, - flags, ipc_ns->user_ns, ipc_ns); - if (IS_ERR(sb)) - return ERR_CAST(sb); - - if (!sb->s_root) { - int ret = binderfs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0); - if (ret) { - deactivate_locked_super(sb); - return ERR_PTR(ret); - } - - sb->s_flags |= SB_ACTIVE; - } - - return dget(sb->s_root); + return mount_nodev(fs_type, flags, data, binderfs_fill_super); } static void binderfs_kill_super(struct super_block *sb) { struct binderfs_info *info = sb->s_fs_info; + kill_litter_super(sb); + if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); kfree(info); - kill_litter_super(sb); } static struct file_system_type binder_fs_type = { @@ -530,14 +558,6 @@ static int __init init_binderfs(void) return ret; } - binderfs_mnt = kern_mount(&binder_fs_type); - if (IS_ERR(binderfs_mnt)) { - ret = PTR_ERR(binderfs_mnt); - binderfs_mnt = NULL; - unregister_filesystem(&binder_fs_type); - unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); - } - return ret; } diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 4ca7a6b4eaae..8218db17ebdb 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -1091,7 +1091,7 @@ comment "Generic fallback / legacy drivers" config PATA_ACPI tristate "ACPI firmware driver for PATA" - depends on ATA_ACPI && ATA_BMDMA + depends on ATA_ACPI && ATA_BMDMA && PCI help This option enables an ACPI method driver which drives motherboard PATA controller interfaces through the ACPI diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 8cc9c429ad95..9e7fc302430f 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -915,6 +915,10 @@ static struct scsi_host_template pata_macio_sht = { .sg_tablesize = MAX_DCMDS, /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, + /* Not sure what the real max is but we know it's less than 64K, let's + * use 64K minus 256 + */ + .max_segment_size = MAX_DBDMA_SEG, .slave_configure = pata_macio_slave_config, }; @@ -1044,11 +1048,6 @@ static int pata_macio_common_init(struct pata_macio_priv *priv, /* Make sure we have sane initial timings in the cache */ pata_macio_default_timings(priv); - /* Not sure what the real max is but we know it's less than 64K, let's - * use 64K minus 256 - */ - dma_set_max_seg_size(priv->dev, MAX_DBDMA_SEG); - /* Allocate libata host for 1 port */ memset(&pinfo, 0, sizeof(struct ata_port_info)); pmac_macio_calc_timing_masks(priv, &pinfo); diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index e0bcf9b2dab0..174e84ce4379 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -245,8 +245,15 @@ struct inic_port_priv { static struct scsi_host_template inic_sht = { ATA_BASE_SHT(DRV_NAME), - .sg_tablesize = LIBATA_MAX_PRD, /* maybe it can be larger? */ - .dma_boundary = INIC_DMA_BOUNDARY, + .sg_tablesize = LIBATA_MAX_PRD, /* maybe it can be larger? */ + + /* + * This controller is braindamaged. dma_boundary is 0xffff like others + * but it will lock up the whole machine HARD if 65536 byte PRD entry + * is fed. Reduce maximum segment size. + */ + .dma_boundary = INIC_DMA_BOUNDARY, + .max_segment_size = 65536 - 512, }; static const int scr_map[] = { @@ -868,17 +875,6 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } - /* - * This controller is braindamaged. dma_boundary is 0xffff - * like others but it will lock up the whole machine HARD if - * 65536 byte PRD entry is fed. Reduce maximum segment size. - */ - rc = dma_set_max_seg_size(&pdev->dev, 65536 - 512); - if (rc) { - dev_err(&pdev->dev, "failed to set the maximum segment size\n"); - return rc; - } - rc = init_controller(hpriv->mmio_base, hpriv->cached_hctl); if (rc) { dev_err(&pdev->dev, "failed to initialize controller\n"); diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 2e9d1cfe3aeb..211607986134 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -718,7 +718,7 @@ static int he_init_cs_block_rcm(struct he_dev *he_dev) instead of '/ 512', use '>> 9' to prevent a call to divdu3 on x86 platforms */ - rate_cps = (unsigned long long) (1 << exp) * (man + 512) >> 9; + rate_cps = (unsigned long long) (1UL << exp) * (man + 512) >> 9; if (rate_cps < 10) rate_cps = 10; /* 2.2.1 minimum payload rate is 10 cps */ diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 1bd1145ad8b5..330c1f7e9665 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -108,6 +108,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data) * suppress pointless writes. */ for (i = 0; i < d->chip->num_regs; i++) { + if (!d->chip->mask_base) + continue; + reg = d->chip->mask_base + (i * map->reg_stride * d->irq_reg_stride); if (d->chip->mask_invert) { @@ -258,7 +261,7 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type) const struct regmap_irq_type *t = &irq_data->type; if ((t->types_supported & type) != type) - return -ENOTSUPP; + return 0; reg = t->type_reg_offset / map->reg_stride; @@ -588,6 +591,9 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, /* Mask all the interrupts by default */ for (i = 0; i < chip->num_regs; i++) { d->mask_buf[i] = d->mask_buf_def[i]; + if (!chip->mask_base) + continue; + reg = chip->mask_base + (i * map->reg_stride * d->irq_reg_stride); if (chip->mask_invert) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 08696f5f00bb..7c9a949e876b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -288,9 +288,10 @@ static void nbd_size_update(struct nbd_device *nbd) blk_queue_physical_block_size(nbd->disk->queue, config->blksize); set_capacity(nbd->disk, config->bytesize >> 9); if (bdev) { - if (bdev->bd_disk) + if (bdev->bd_disk) { bd_set_size(bdev, config->bytesize); - else + set_blocksize(bdev, config->blksize); + } else bdev->bd_invalidated = 1; bdput(bdev); } diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index a74ce885b541..c518659b4d9f 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -32,6 +32,7 @@ #include #include #include +#include #define IPMI_DRIVER_VERSION "39.2" @@ -62,7 +63,8 @@ static void ipmi_debug_msg(const char *title, unsigned char *data, { } #endif -static int initialized; +static bool initialized; +static bool drvregistered; enum ipmi_panic_event_op { IPMI_SEND_PANIC_EVENT_NONE, @@ -612,7 +614,7 @@ static DEFINE_MUTEX(ipmidriver_mutex); static LIST_HEAD(ipmi_interfaces); static DEFINE_MUTEX(ipmi_interfaces_mutex); -DEFINE_STATIC_SRCU(ipmi_interfaces_srcu); +struct srcu_struct ipmi_interfaces_srcu; /* * List of watchers that want to know when smi's are added and deleted. @@ -720,7 +722,15 @@ struct watcher_entry { int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) { struct ipmi_smi *intf; - int index; + int index, rv; + + /* + * Make sure the driver is actually initialized, this handles + * problems with initialization order. + */ + rv = ipmi_init_msghandler(); + if (rv) + return rv; mutex_lock(&smi_watchers_mutex); @@ -884,7 +894,7 @@ static int deliver_response(struct ipmi_smi *intf, struct ipmi_recv_msg *msg) if (user) { user->handler->ipmi_recv_hndl(msg, user->handler_data); - release_ipmi_user(msg->user, index); + release_ipmi_user(user, index); } else { /* User went away, give up. */ ipmi_free_recv_msg(msg); @@ -1076,7 +1086,7 @@ int ipmi_create_user(unsigned int if_num, { unsigned long flags; struct ipmi_user *new_user; - int rv = 0, index; + int rv, index; struct ipmi_smi *intf; /* @@ -1094,18 +1104,9 @@ int ipmi_create_user(unsigned int if_num, * Make sure the driver is actually initialized, this handles * problems with initialization order. */ - if (!initialized) { - rv = ipmi_init_msghandler(); - if (rv) - return rv; - - /* - * The init code doesn't return an error if it was turned - * off, but it won't initialize. Check that. - */ - if (!initialized) - return -ENODEV; - } + rv = ipmi_init_msghandler(); + if (rv) + return rv; new_user = kmalloc(sizeof(*new_user), GFP_KERNEL); if (!new_user) @@ -1183,6 +1184,7 @@ EXPORT_SYMBOL(ipmi_get_smi_info); static void free_user(struct kref *ref) { struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); + cleanup_srcu_struct(&user->release_barrier); kfree(user); } @@ -1259,7 +1261,6 @@ int ipmi_destroy_user(struct ipmi_user *user) { _ipmi_destroy_user(user); - cleanup_srcu_struct(&user->release_barrier); kref_put(&user->refcount, free_user); return 0; @@ -1298,10 +1299,12 @@ int ipmi_set_my_address(struct ipmi_user *user, if (!user) return -ENODEV; - if (channel >= IPMI_MAX_CHANNELS) + if (channel >= IPMI_MAX_CHANNELS) { rv = -EINVAL; - else + } else { + channel = array_index_nospec(channel, IPMI_MAX_CHANNELS); user->intf->addrinfo[channel].address = address; + } release_ipmi_user(user, index); return rv; @@ -1318,10 +1321,12 @@ int ipmi_get_my_address(struct ipmi_user *user, if (!user) return -ENODEV; - if (channel >= IPMI_MAX_CHANNELS) + if (channel >= IPMI_MAX_CHANNELS) { rv = -EINVAL; - else + } else { + channel = array_index_nospec(channel, IPMI_MAX_CHANNELS); *address = user->intf->addrinfo[channel].address; + } release_ipmi_user(user, index); return rv; @@ -1338,10 +1343,12 @@ int ipmi_set_my_LUN(struct ipmi_user *user, if (!user) return -ENODEV; - if (channel >= IPMI_MAX_CHANNELS) + if (channel >= IPMI_MAX_CHANNELS) { rv = -EINVAL; - else + } else { + channel = array_index_nospec(channel, IPMI_MAX_CHANNELS); user->intf->addrinfo[channel].lun = LUN & 0x3; + } release_ipmi_user(user, index); return rv; @@ -1358,10 +1365,12 @@ int ipmi_get_my_LUN(struct ipmi_user *user, if (!user) return -ENODEV; - if (channel >= IPMI_MAX_CHANNELS) + if (channel >= IPMI_MAX_CHANNELS) { rv = -EINVAL; - else + } else { + channel = array_index_nospec(channel, IPMI_MAX_CHANNELS); *address = user->intf->addrinfo[channel].lun; + } release_ipmi_user(user, index); return rv; @@ -2184,6 +2193,7 @@ static int check_addr(struct ipmi_smi *intf, { if (addr->channel >= IPMI_MAX_CHANNELS) return -EINVAL; + addr->channel = array_index_nospec(addr->channel, IPMI_MAX_CHANNELS); *lun = intf->addrinfo[addr->channel].lun; *saddr = intf->addrinfo[addr->channel].address; return 0; @@ -3291,17 +3301,9 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers, * Make sure the driver is actually initialized, this handles * problems with initialization order. */ - if (!initialized) { - rv = ipmi_init_msghandler(); - if (rv) - return rv; - /* - * The init code doesn't return an error if it was turned - * off, but it won't initialize. Check that. - */ - if (!initialized) - return -ENODEV; - } + rv = ipmi_init_msghandler(); + if (rv) + return rv; intf = kzalloc(sizeof(*intf), GFP_KERNEL); if (!intf) @@ -5017,6 +5019,22 @@ static int panic_event(struct notifier_block *this, return NOTIFY_DONE; } +/* Must be called with ipmi_interfaces_mutex held. */ +static int ipmi_register_driver(void) +{ + int rv; + + if (drvregistered) + return 0; + + rv = driver_register(&ipmidriver.driver); + if (rv) + pr_err("Could not register IPMI driver\n"); + else + drvregistered = true; + return rv; +} + static struct notifier_block panic_block = { .notifier_call = panic_event, .next = NULL, @@ -5027,66 +5045,75 @@ static int ipmi_init_msghandler(void) { int rv; + mutex_lock(&ipmi_interfaces_mutex); + rv = ipmi_register_driver(); + if (rv) + goto out; if (initialized) - return 0; + goto out; - rv = driver_register(&ipmidriver.driver); - if (rv) { - pr_err("Could not register IPMI driver\n"); - return rv; - } - - pr_info("version " IPMI_DRIVER_VERSION "\n"); + init_srcu_struct(&ipmi_interfaces_srcu); timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - initialized = 1; + initialized = true; - return 0; +out: + mutex_unlock(&ipmi_interfaces_mutex); + return rv; } static int __init ipmi_init_msghandler_mod(void) { - ipmi_init_msghandler(); - return 0; + int rv; + + pr_info("version " IPMI_DRIVER_VERSION "\n"); + + mutex_lock(&ipmi_interfaces_mutex); + rv = ipmi_register_driver(); + mutex_unlock(&ipmi_interfaces_mutex); + + return rv; } static void __exit cleanup_ipmi(void) { int count; - if (!initialized) - return; + if (initialized) { + atomic_notifier_chain_unregister(&panic_notifier_list, + &panic_block); - atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); + /* + * This can't be called if any interfaces exist, so no worry + * about shutting down the interfaces. + */ - /* - * This can't be called if any interfaces exist, so no worry - * about shutting down the interfaces. - */ + /* + * Tell the timer to stop, then wait for it to stop. This + * avoids problems with race conditions removing the timer + * here. + */ + atomic_inc(&stop_operation); + del_timer_sync(&ipmi_timer); - /* - * Tell the timer to stop, then wait for it to stop. This - * avoids problems with race conditions removing the timer - * here. - */ - atomic_inc(&stop_operation); - del_timer_sync(&ipmi_timer); + initialized = false; - driver_unregister(&ipmidriver.driver); + /* Check for buffer leaks. */ + count = atomic_read(&smi_msg_inuse_count); + if (count != 0) + pr_warn("SMI message count %d at exit\n", count); + count = atomic_read(&recv_msg_inuse_count); + if (count != 0) + pr_warn("recv message count %d at exit\n", count); - initialized = 0; - - /* Check for buffer leaks. */ - count = atomic_read(&smi_msg_inuse_count); - if (count != 0) - pr_warn("SMI message count %d at exit\n", count); - count = atomic_read(&recv_msg_inuse_count); - if (count != 0) - pr_warn("recv message count %d at exit\n", count); + cleanup_srcu_struct(&ipmi_interfaces_srcu); + } + if (drvregistered) + driver_unregister(&ipmidriver.driver); } module_exit(cleanup_ipmi); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index ca9528c4f183..b7a1ae2afaea 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -632,8 +632,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, /* Remove the multi-part read marker. */ len -= 2; + data += 2; for (i = 0; i < len; i++) - ssif_info->data[i] = data[i+2]; + ssif_info->data[i] = data[i]; ssif_info->multi_len = len; ssif_info->multi_pos = 1; @@ -661,8 +662,19 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } blocknum = data[0]; + len--; + data++; - if (ssif_info->multi_len + len - 1 > IPMI_MAX_MSG_LENGTH) { + if (blocknum != 0xff && len != 31) { + /* All blocks but the last must have 31 data bytes. */ + result = -EIO; + if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) + pr_info("Received middle message <31\n"); + + goto continue_op; + } + + if (ssif_info->multi_len + len > IPMI_MAX_MSG_LENGTH) { /* Received message too big, abort the operation. */ result = -E2BIG; if (ssif_info->ssif_debug & SSIF_DEBUG_MSG) @@ -671,16 +683,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, goto continue_op; } - /* Remove the blocknum from the data. */ - len--; for (i = 0; i < len; i++) - ssif_info->data[i + ssif_info->multi_len] = data[i + 1]; + ssif_info->data[i + ssif_info->multi_len] = data[i]; ssif_info->multi_len += len; if (blocknum == 0xff) { /* End of read */ len = ssif_info->multi_len; data = ssif_info->data; - } else if (blocknum + 1 != ssif_info->multi_pos) { + } else if (blocknum != ssif_info->multi_pos) { /* * Out of sequence block, just abort. Block * numbers start at zero for the second block, @@ -707,6 +717,7 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } } + continue_op: if (result < 0) { ssif_inc_stat(ssif_info, receive_errors); } else { @@ -714,8 +725,6 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, ssif_inc_stat(ssif_info, received_message_parts); } - - continue_op: if (ssif_info->ssif_debug & SSIF_DEBUG_STATE) pr_info("DONE 1: state = %d, result=%d\n", ssif_info->ssif_state, result); diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index b5e3103c1175..e43c876a9223 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "smapi.h" #include "mwavedd.h" #include "3780i.h" @@ -289,6 +290,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" " ipcnum %x entry usIntCount %x\n", @@ -317,6 +320,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); PRINTK_3(TRACE_MWAVE, "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" " ipcnum %x, usIntCount %x\n", @@ -383,6 +388,8 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + ipcnum = array_index_nospec(ipcnum, + ARRAY_SIZE(pDrvData->IPCs)); mutex_lock(&mwave_mutex); if (pDrvData->IPCs[ipcnum].bIsEnabled == true) { pDrvData->IPCs[ipcnum].bIsEnabled = false; diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index e5b2fe80eab4..d2f0bb5ba47e 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -293,7 +293,6 @@ config COMMON_CLK_BD718XX source "drivers/clk/actions/Kconfig" source "drivers/clk/bcm/Kconfig" source "drivers/clk/hisilicon/Kconfig" -source "drivers/clk/imx/Kconfig" source "drivers/clk/imgtec/Kconfig" source "drivers/clk/imx/Kconfig" source "drivers/clk/ingenic/Kconfig" diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index 5b393e711e94..7d16ab0784ec 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -262,8 +262,10 @@ static int vc5_mux_set_parent(struct clk_hw *hw, u8 index) if (vc5->clk_mux_ins == VC5_MUX_IN_XIN) src = VC5_PRIM_SRC_SHDN_EN_XTAL; - if (vc5->clk_mux_ins == VC5_MUX_IN_CLKIN) + else if (vc5->clk_mux_ins == VC5_MUX_IN_CLKIN) src = VC5_PRIM_SRC_SHDN_EN_CLKIN; + else /* Invalid; should have been caught by vc5_probe() */ + return -EINVAL; } return regmap_update_bits(vc5->regmap, VC5_PRIM_SRC_SHDN, mask, src); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 75d13c0eff12..6ccdbedb02f3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2779,7 +2779,7 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level) seq_printf(s, "\"protect_count\": %d,", c->protect_count); seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c)); seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c)); - seq_printf(s, "\"phase\": %d", clk_core_get_phase(c)); + seq_printf(s, "\"phase\": %d,", clk_core_get_phase(c)); seq_printf(s, "\"duty_cycle\": %u", clk_core_get_scaled_duty_cycle(c, 100000)); } diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index 99c2508de8e5..fb6edf1b8aa2 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -169,6 +169,8 @@ static int imx8qxp_lpcg_clk_probe(struct platform_device *pdev) return -ENODEV; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; base = devm_ioremap(dev, res->start, resource_size(res)); if (!base) return -ENOMEM; diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig index 1b1ba54e33dd..1c04575c118f 100644 --- a/drivers/clk/qcom/Kconfig +++ b/drivers/clk/qcom/Kconfig @@ -215,6 +215,7 @@ config MSM_MMCC_8996 config MSM_GCC_8998 tristate "MSM8998 Global Clock Controller" + select QCOM_GDSC help Support for the global clock controller on msm8998 devices. Say Y if you want to use peripheral devices such as UART, SPI, diff --git a/drivers/clk/socfpga/clk-pll-s10.c b/drivers/clk/socfpga/clk-pll-s10.c index 2d5d8b43727e..c4d0b6f6abf2 100644 --- a/drivers/clk/socfpga/clk-pll-s10.c +++ b/drivers/clk/socfpga/clk-pll-s10.c @@ -43,7 +43,7 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hwclk, /* Read mdiv and fdiv from the fdbck register */ reg = readl(socfpgaclk->hw.reg + 0x4); mdiv = (reg & SOCFPGA_PLL_MDIV_MASK) >> SOCFPGA_PLL_MDIV_SHIFT; - vco_freq = (unsigned long long)parent_rate * (mdiv + 6); + vco_freq = (unsigned long long)vco_freq * (mdiv + 6); return (unsigned long)vco_freq; } diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 5b238fc314ac..8281dfbf38c2 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -12,17 +12,17 @@ #include "stratix10-clk.h" -static const char * const pll_mux[] = { "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk",}; +static const char * const pll_mux[] = { "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk",}; static const char * const cntr_mux[] = { "main_pll", "periph_pll", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; -static const char * const boot_mux[] = { "osc1", "cb_intosc_hs_div2_clk",}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; +static const char * const boot_mux[] = { "osc1", "cb-intosc-hs-div2-clk",}; static const char * const noc_free_mux[] = {"main_noc_base_clk", "peri_noc_base_clk", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; static const char * const emaca_free_mux[] = {"peri_emaca_clk", "boot_clk"}; static const char * const emacb_free_mux[] = {"peri_emacb_clk", "boot_clk"}; @@ -33,14 +33,14 @@ static const char * const s2f_usr1_free_mux[] = {"peri_s2f_usr1_clk", "boot_clk" static const char * const psi_ref_free_mux[] = {"peri_psi_ref_clk", "boot_clk"}; static const char * const mpu_mux[] = { "mpu_free_clk", "boot_clk",}; -static const char * const s2f_usr0_mux[] = {"f2s_free_clk", "boot_clk"}; +static const char * const s2f_usr0_mux[] = {"f2s-free-clk", "boot_clk"}; static const char * const emac_mux[] = {"emaca_free_clk", "emacb_free_clk"}; static const char * const noc_mux[] = {"noc_free_clk", "boot_clk"}; static const char * const mpu_free_mux[] = {"main_mpu_base_clk", "peri_mpu_base_clk", - "osc1", "cb_intosc_hs_div2_clk", - "f2s_free_clk"}; + "osc1", "cb-intosc-hs-div2-clk", + "f2s-free-clk"}; /* clocks in AO (always on) controller */ static const struct stratix10_pll_clock s10_pll_clks[] = { diff --git a/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c b/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c index 269d3595758b..edc31bb56674 100644 --- a/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c +++ b/drivers/clk/tegra/clk-tegra124-dfll-fcpu.c @@ -133,9 +133,11 @@ static int tegra124_dfll_fcpu_remove(struct platform_device *pdev) struct tegra_dfll_soc_data *soc; soc = tegra_dfll_unregister(pdev); - if (IS_ERR(soc)) + if (IS_ERR(soc)) { dev_err(&pdev->dev, "failed to unregister DFLL: %ld\n", PTR_ERR(soc)); + return PTR_ERR(soc); + } tegra_cvb_remove_opp_table(soc->dev, soc->cvb, soc->max_freq); diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c index f65cc0ff76ab..b0908ec62f73 100644 --- a/drivers/clk/zynqmp/clkc.c +++ b/drivers/clk/zynqmp/clkc.c @@ -669,8 +669,8 @@ static int zynqmp_clk_setup(struct device_node *np) if (ret) return ret; - zynqmp_data = kzalloc(sizeof(*zynqmp_data) + sizeof(*zynqmp_data) * - clock_max_idx, GFP_KERNEL); + zynqmp_data = kzalloc(struct_size(zynqmp_data, hws, clock_max_idx), + GFP_KERNEL); if (!zynqmp_data) return -ENOMEM; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 5a90075f719d..0be55fcc19ba 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -692,6 +692,7 @@ config CRYPTO_DEV_BCM_SPU depends on ARCH_BCM_IPROC depends on MAILBOX default m + select CRYPTO_AUTHENC select CRYPTO_DES select CRYPTO_MD5 select CRYPTO_SHA1 diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index c9393ffb70ed..5567cbda2798 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -2845,44 +2845,28 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, struct spu_hw *spu = &iproc_priv.spu; struct iproc_ctx_s *ctx = crypto_aead_ctx(cipher); struct crypto_tfm *tfm = crypto_aead_tfm(cipher); - struct rtattr *rta = (void *)key; - struct crypto_authenc_key_param *param; - const u8 *origkey = key; - const unsigned int origkeylen = keylen; - - int ret = 0; + struct crypto_authenc_keys keys; + int ret; flow_log("%s() aead:%p key:%p keylen:%u\n", __func__, cipher, key, keylen); flow_dump(" key: ", key, keylen); - if (!RTA_OK(rta, keylen)) - goto badkey; - if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) - goto badkey; - if (RTA_PAYLOAD(rta) < sizeof(*param)) + ret = crypto_authenc_extractkeys(&keys, key, keylen); + if (ret) goto badkey; - param = RTA_DATA(rta); - ctx->enckeylen = be32_to_cpu(param->enckeylen); - - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); - - if (keylen < ctx->enckeylen) - goto badkey; - if (ctx->enckeylen > MAX_KEY_SIZE) + if (keys.enckeylen > MAX_KEY_SIZE || + keys.authkeylen > MAX_KEY_SIZE) goto badkey; - ctx->authkeylen = keylen - ctx->enckeylen; + ctx->enckeylen = keys.enckeylen; + ctx->authkeylen = keys.authkeylen; - if (ctx->authkeylen > MAX_KEY_SIZE) - goto badkey; - - memcpy(ctx->enckey, key + ctx->authkeylen, ctx->enckeylen); + memcpy(ctx->enckey, keys.enckey, keys.enckeylen); /* May end up padding auth key. So make sure it's zeroed. */ memset(ctx->authkey, 0, sizeof(ctx->authkey)); - memcpy(ctx->authkey, key, ctx->authkeylen); + memcpy(ctx->authkey, keys.authkey, keys.authkeylen); switch (ctx->alg->cipher_info.alg) { case CIPHER_ALG_DES: @@ -2890,7 +2874,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, u32 tmp[DES_EXPKEY_WORDS]; u32 flags = CRYPTO_TFM_RES_WEAK_KEY; - if (des_ekey(tmp, key) == 0) { + if (des_ekey(tmp, keys.enckey) == 0) { if (crypto_aead_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY) { crypto_aead_set_flags(cipher, flags); @@ -2905,7 +2889,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, break; case CIPHER_ALG_3DES: if (ctx->enckeylen == (DES_KEY_SIZE * 3)) { - const u32 *K = (const u32 *)key; + const u32 *K = (const u32 *)keys.enckey; u32 flags = CRYPTO_TFM_RES_BAD_KEY_SCHED; if (!((K[0] ^ K[2]) | (K[1] ^ K[3])) || @@ -2956,9 +2940,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher, ctx->fallback_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; ctx->fallback_cipher->base.crt_flags |= tfm->crt_flags & CRYPTO_TFM_REQ_MASK; - ret = - crypto_aead_setkey(ctx->fallback_cipher, origkey, - origkeylen); + ret = crypto_aead_setkey(ctx->fallback_cipher, key, keylen); if (ret) { flow_log(" fallback setkey() returned:%d\n", ret); tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 92e593e2069a..80ae69f906fb 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -3476,7 +3476,7 @@ static int __init caam_algapi_init(void) * Skip algorithms requiring message digests * if MD or MD size is not supported by device. */ - if ((c2_alg_sel & ~OP_ALG_ALGSEL_SUBMASK) == 0x40 && + if (is_mdha(c2_alg_sel) && (!md_inst || t_alg->aead.maxauthsize > md_limit)) continue; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 81712aa5d0f2..bb1a2cdf1951 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1072,13 +1072,16 @@ static int ahash_final_no_ctx(struct ahash_request *req) desc = edesc->hw_desc; - state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, state->buf_dma)) { - dev_err(jrdev, "unable to map src\n"); - goto unmap; - } + if (buflen) { + state->buf_dma = dma_map_single(jrdev, buf, buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, state->buf_dma)) { + dev_err(jrdev, "unable to map src\n"); + goto unmap; + } - append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + append_seq_in_ptr(desc, state->buf_dma, buflen, 0); + } edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result, digestsize); diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index ec10230178c5..4b6854bf896a 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1155,6 +1155,7 @@ #define OP_ALG_ALGSEL_DES (0x20 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_3DES (0x21 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_ARC4 (0x30 << OP_ALG_ALGSEL_SHIFT) +#define OP_ALG_CHA_MDHA (0x40 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_MD5 (0x40 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_SHA1 (0x41 << OP_ALG_ALGSEL_SHIFT) #define OP_ALG_ALGSEL_SHA224 (0x42 << OP_ALG_ALGSEL_SHIFT) diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h index 67ea94079837..8c6b83e02a70 100644 --- a/drivers/crypto/caam/error.h +++ b/drivers/crypto/caam/error.h @@ -7,6 +7,9 @@ #ifndef CAAM_ERROR_H #define CAAM_ERROR_H + +#include "desc.h" + #define CAAM_ERROR_STR_MAX 302 void caam_strstatus(struct device *dev, u32 status, bool qi_v2); @@ -17,4 +20,10 @@ void caam_strstatus(struct device *dev, u32 status, bool qi_v2); void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, struct scatterlist *sg, size_t tlen, bool ascii); + +static inline bool is_mdha(u32 algtype) +{ + return (algtype & OP_ALG_ALGSEL_MASK & ~OP_ALG_ALGSEL_SUBMASK) == + OP_ALG_CHA_MDHA; +} #endif /* CAAM_ERROR_H */ diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c index e34e4df8fd24..fe070d75c842 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c +++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c @@ -567,10 +567,10 @@ static void process_response_list(struct nitrox_cmdq *cmdq) /* ORH error code */ err = READ_ONCE(*sr->resp.orh) & 0xff; - softreq_destroy(sr); if (sr->callback) sr->callback(sr->cb_arg, err); + softreq_destroy(sr); req_completed++; } diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index f2643cda45db..a3527c00b29a 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -549,13 +549,12 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct rtattr *rta = (struct rtattr *)key; struct cc_crypto_req cc_req = {}; - struct crypto_authenc_key_param *param; struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ]; - int rc = -EINVAL; unsigned int seq_len = 0; struct device *dev = drvdata_to_dev(ctx->drvdata); + const u8 *enckey, *authkey; + int rc; dev_dbg(dev, "Setting key in context @%p for %s. key=%p keylen=%u\n", ctx, crypto_tfm_alg_name(crypto_aead_tfm(tfm)), key, keylen); @@ -563,35 +562,33 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key, /* STAT_PHASE_0: Init and sanity checks */ if (ctx->auth_mode != DRV_HASH_NULL) { /* authenc() alg. */ - if (!RTA_OK(rta, keylen)) + struct crypto_authenc_keys keys; + + rc = crypto_authenc_extractkeys(&keys, key, keylen); + if (rc) goto badkey; - if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) - goto badkey; - if (RTA_PAYLOAD(rta) < sizeof(*param)) - goto badkey; - param = RTA_DATA(rta); - ctx->enc_keylen = be32_to_cpu(param->enckeylen); - key += RTA_ALIGN(rta->rta_len); - keylen -= RTA_ALIGN(rta->rta_len); - if (keylen < ctx->enc_keylen) - goto badkey; - ctx->auth_keylen = keylen - ctx->enc_keylen; + enckey = keys.enckey; + authkey = keys.authkey; + ctx->enc_keylen = keys.enckeylen; + ctx->auth_keylen = keys.authkeylen; if (ctx->cipher_mode == DRV_CIPHER_CTR) { /* the nonce is stored in bytes at end of key */ + rc = -EINVAL; if (ctx->enc_keylen < (AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE)) goto badkey; /* Copy nonce from last 4 bytes in CTR key to * first 4 bytes in CTR IV */ - memcpy(ctx->ctr_nonce, key + ctx->auth_keylen + - ctx->enc_keylen - CTR_RFC3686_NONCE_SIZE, - CTR_RFC3686_NONCE_SIZE); + memcpy(ctx->ctr_nonce, enckey + ctx->enc_keylen - + CTR_RFC3686_NONCE_SIZE, CTR_RFC3686_NONCE_SIZE); /* Set CTR key size */ ctx->enc_keylen -= CTR_RFC3686_NONCE_SIZE; } } else { /* non-authenc - has just one key */ + enckey = key; + authkey = NULL; ctx->enc_keylen = keylen; ctx->auth_keylen = 0; } @@ -603,13 +600,14 @@ static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key, /* STAT_PHASE_1: Copy key to ctx */ /* Get key material */ - memcpy(ctx->enckey, key + ctx->auth_keylen, ctx->enc_keylen); + memcpy(ctx->enckey, enckey, ctx->enc_keylen); if (ctx->enc_keylen == 24) memset(ctx->enckey + 24, 0, CC_AES_KEY_SIZE_MAX - 24); if (ctx->auth_mode == DRV_HASH_XCBC_MAC) { - memcpy(ctx->auth_state.xcbc.xcbc_keys, key, ctx->auth_keylen); + memcpy(ctx->auth_state.xcbc.xcbc_keys, authkey, + ctx->auth_keylen); } else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC */ - rc = cc_get_plain_hmac_key(tfm, key, ctx->auth_keylen); + rc = cc_get_plain_hmac_key(tfm, authkey, ctx->auth_keylen); if (rc) goto badkey; } diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 59b75299fcbc..2e11b0d6a9bb 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "chtls.h" #include "chtls_cm.h" @@ -1015,6 +1016,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, { struct inet_sock *newinet; const struct iphdr *iph; + struct tls_context *ctx; struct net_device *ndev; struct chtls_sock *csk; struct dst_entry *dst; @@ -1063,6 +1065,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid)); sk_setup_caps(newsk, dst); + ctx = tls_get_ctx(lsk); + newsk->sk_destruct = ctx->sk_destruct; csk->sk = newsk; csk->passive_reap_next = oreq; csk->tx_chan = cxgb4_port_chan(ndev); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 45e20707cef8..f8e2c5c3f4eb 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1361,23 +1361,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - void *err; if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } - if (ivsize) - iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); - if (!dst || dst == src) { src_len = assoclen + cryptlen + authsize; src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; @@ -1387,16 +1382,14 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } src_nents = (src_nents == 1) ? 0 : src_nents; dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); - err = ERR_PTR(-EINVAL); - goto error_sg; + return ERR_PTR(-EINVAL); } dst_nents = (dst_nents == 1) ? 0 : dst_nents; } @@ -1423,11 +1416,14 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, /* if its a ahash, add space for a second desc next to the first one */ if (is_sec1 && !dst) alloc_len += sizeof(struct talitos_desc); + alloc_len += ivsize; edesc = kmalloc(alloc_len, GFP_DMA | flags); - if (!edesc) { - err = ERR_PTR(-ENOMEM); - goto error_sg; + if (!edesc) + return ERR_PTR(-ENOMEM); + if (ivsize) { + iv = memcpy(((u8 *)edesc) + alloc_len - ivsize, iv, ivsize); + iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); } memset(&edesc->desc, 0, sizeof(edesc->desc)); @@ -1445,10 +1441,6 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, DMA_BIDIRECTIONAL); } return edesc; -error_sg: - if (iv_dma) - dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE); - return err; } static struct talitos_edesc *aead_edesc_alloc(struct aead_request *areq, u8 *iv, diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 4213cb0bb2a7..f8664bac9fa8 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -295,8 +295,8 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 /* Sticky registers for Uncorrected Errors */ -#define S10_SYSMGR_UE_VAL_OFST 0x120 -#define S10_SYSMGR_UE_ADDR_OFST 0x124 +#define S10_SYSMGR_UE_VAL_OFST 0x220 +#define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 09b845e90114..a785ffd5af89 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1144,10 +1144,6 @@ static int sbp2_probe(struct fw_unit *unit, const struct ieee1394_device_id *id) if (device->is_local) return -ENODEV; - if (dma_get_max_seg_size(device->card->device) > SBP2_MAX_SEG_SIZE) - WARN_ON(dma_set_max_seg_size(device->card->device, - SBP2_MAX_SEG_SIZE)); - shost = scsi_host_alloc(&scsi_driver_template, sizeof(*tgt)); if (shost == NULL) return -ENOMEM; @@ -1610,6 +1606,7 @@ static struct scsi_host_template scsi_driver_template = { .eh_abort_handler = sbp2_scsi_abort, .this_id = -1, .sg_tablesize = SG_ALL, + .max_segment_size = SBP2_MAX_SEG_SIZE, .can_queue = 1, .sdev_attrs = sbp2_scsi_sysfs_attrs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index a028661d9e20..92b11de19581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -576,6 +576,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index dafc645b2e4e..b083b219b1a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -531,17 +531,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_gem_object *obj; struct amdgpu_framebuffer *amdgpu_fb; int ret; - int height; - struct amdgpu_device *adev = dev->dev_private; - int cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0); - int pitch = mode_cmd->pitches[0] / cpp; - - pitch = amdgpu_align_pitch(adev, pitch, cpp, false); - if (mode_cmd->pitches[0] != pitch) { - DRM_DEBUG_KMS("Invalid pitch: expecting %d but got %d\n", - pitch, mode_cmd->pitches[0]); - return ERR_PTR(-EINVAL); - } obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (obj == NULL) { @@ -556,13 +545,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-EINVAL); } - height = ALIGN(mode_cmd->height, 8); - if (obj->size < pitch * height) { - DRM_DEBUG_KMS("Invalid GEM size: expecting >= %d but got %zu\n", - pitch * height, obj->size); - return ERR_PTR(-EINVAL); - } - amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { drm_gem_object_put_unlocked(obj); diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index fbf0ee5201c3..c3613604a4f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,8 +4,8 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && X86_64 - imply AMD_IOMMU_V2 + depends on DRM_AMDGPU && (X86_64 || ARM64) + imply AMD_IOMMU_V2 if X86_64 select MMU_NOTIFIER help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index b7bc7d7d048f..5d85ff341385 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -863,6 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#if CONFIG_X86_64 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, uint32_t *num_entries, struct crat_subtype_iolink *sub_type_hdr) @@ -905,6 +906,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, return 0; } +#endif /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU * @@ -920,7 +922,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) struct crat_subtype_generic *sub_type_hdr; int avail_size = *size; int numa_node_id; +#ifdef CONFIG_X86_64 uint32_t entries = 0; +#endif int ret = 0; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) @@ -982,6 +986,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr->length); /* Fill in Subtype: IO Link */ +#ifdef CONFIG_X86_64 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, &entries, (struct crat_subtype_iolink *)sub_type_hdr); @@ -992,6 +997,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length * entries); +#else + pr_info("IO link not available for non x86 platforms\n"); +#endif crat_table->num_domains++; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5f5b2acedbac..09da91644f9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1093,8 +1093,6 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) * the GPU device is not already present in the topology device * list then return NULL. This means a new topology device has to * be created for this GPU. - * TODO: Rather than assiging @gpu to first topology device withtout - * gpu attached, it will better to have more stringent check. */ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { @@ -1102,12 +1100,20 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_topology_device *out_dev = NULL; down_write(&topology_lock); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry(dev, &topology_device_list, list) { + /* Discrete GPUs need their own topology device list + * entries. Don't assign them to CPU/APU nodes. + */ + if (!gpu->device_info->needs_iommu_device && + dev->node_props.cpu_cores_count) + continue; + if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; } + } up_write(&topology_lock); return out_dev; } @@ -1392,7 +1398,6 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) { - const struct cpuinfo_x86 *cpuinfo; int first_cpu_of_numa_node; if (!cpumask || cpumask == cpu_none_mask) @@ -1400,9 +1405,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) first_cpu_of_numa_node = cpumask_first(cpumask); if (first_cpu_of_numa_node >= nr_cpu_ids) return -1; - cpuinfo = &cpu_data(first_cpu_of_numa_node); - - return cpuinfo->apicid; +#ifdef CONFIG_X86_64 + return cpu_data(first_cpu_of_numa_node).apicid; +#else + return first_cpu_of_numa_node; +#endif } /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 34f35e9a3c46..f4fa40c387d3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1772,7 +1772,7 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) + caps.min_input_signal * 0x101; if (dc_link_set_backlight_level(dm->backlight_link, - brightness, 0, 0)) + brightness, 0)) return 0; else return 1; @@ -5933,7 +5933,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && !new_crtc_state->color_mgmt_changed && - !new_crtc_state->vrr_enabled) + old_crtc_state->vrr_enabled == new_crtc_state->vrr_enabled) continue; if (!new_crtc_state->enable) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 52deacf39841..b0265dbebd4c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2190,8 +2190,7 @@ int dc_link_get_backlight_level(const struct dc_link *link) bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp, - const struct dc_stream_state *stream) + uint32_t frame_ramp) { struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; @@ -2206,10 +2205,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link, (abm->funcs->set_backlight_level_pwm == NULL)) return false; - if (stream) - ((struct dc_stream_state *)stream)->bl_pwm_level = - backlight_pwm_u16_16; - use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", @@ -2637,11 +2632,6 @@ void core_link_enable_stream( if (dc_is_dp_signal(pipe_ctx->stream->signal)) enable_stream_features(pipe_ctx); - - dc_link_set_backlight_level(pipe_ctx->stream->sink->link, - pipe_ctx->stream->bl_pwm_level, - 0, - pipe_ctx->stream); } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 29f19d57ff7a..b2243e0dad1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -146,8 +146,7 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_ */ bool dc_link_set_backlight_level(const struct dc_link *dc_link, uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp, - const struct dc_stream_state *stream); + uint32_t frame_ramp); int dc_link_get_backlight_level(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index be34d638e15d..d70c9e1cda3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -91,7 +91,6 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; - unsigned int bl_pwm_level; /* from core_stream struct */ struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 4bf24758217f..8f09b8625c5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1000,7 +1000,7 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); - if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + if (num_audio >= 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); /* un-mute audio */ @@ -1017,6 +1017,8 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( pipe_ctx->stream_res.stream_enc, true); if (pipe_ctx->stream_res.audio) { + struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; + if (option != KEEP_ACQUIRED_RESOURCE || !dc->debug.az_endpoint_mute_only) { /*only disalbe az_endpoint if power down or free*/ @@ -1036,6 +1038,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false); pipe_ctx->stream_res.audio = NULL; } + if (pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ + pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); /* TODO: notify audio driver for if audio modes list changed * add audio mode list change flag */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index dcb3c5530236..cd1ebe57ed59 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -463,7 +463,7 @@ void dpp1_set_cursor_position( if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset < 0) + if (src_y_offset + (int)height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 345af015d061..d1acd7165bc8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1140,7 +1140,7 @@ void hubp1_cursor_set_position( if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset < 0) //+ (int)hubp->curs_attr.height + if (src_y_offset + (int)hubp->curs_attr.height <= 0) cur_en = 0; /* not visible beyond top edge*/ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 91e015e14355..58a12ddf12f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2355,30 +2355,23 @@ static void dcn10_apply_ctx_for_surface( top_pipe_to_program->plane_state->update_flags.bits.full_update) for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - + tg = pipe_ctx->stream_res.tg; /* Skip inactive pipes and ones already updated */ if (!pipe_ctx->stream || pipe_ctx->stream == stream - || !pipe_ctx->plane_state) + || !pipe_ctx->plane_state + || !tg->funcs->is_tg_enabled(tg)) continue; - pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); + tg->funcs->lock(tg); pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent( pipe_ctx->plane_res.hubp, &pipe_ctx->dlg_regs, &pipe_ctx->ttu_regs); + + tg->funcs->unlock(tg); } - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (!pipe_ctx->stream || pipe_ctx->stream == stream - || !pipe_ctx->plane_state) - continue; - - dcn10_pipe_control_lock(dc, pipe_ctx, false); - } - if (num_planes == 0) false_optc_underflow_wa(dc, stream, tg); diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 00f63b7dd32f..c11a443dcbc8 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -57,6 +57,7 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le #define NUM_POWER_FN_SEGS 8 #define NUM_BL_CURVE_SEGS 16 +#pragma pack(push, 1) /* NOTE: iRAM is 256B in size */ struct iram_table_v_2 { /* flags */ @@ -100,6 +101,7 @@ struct iram_table_v_2 { uint8_t dummy8; /* 0xfe */ uint8_t dummy9; /* 0xff */ }; +#pragma pack(pop) static uint16_t backlight_8_to_16(unsigned int backlight_8bit) { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index b8747a5c9204..99d596dc0e89 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -32,6 +32,7 @@ #include "vega10_pptable.h" #define NUM_DSPCLK_LEVELS 8 +#define VEGA10_ENGINECLOCK_HARDMAX 198000 static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable, enum phm_platform_caps cap) @@ -258,7 +259,26 @@ static int init_over_drive_limits( struct pp_hwmgr *hwmgr, const ATOM_Vega10_POWERPLAYTABLE *powerplay_table) { - hwmgr->platform_descriptor.overdriveLimit.engineClock = + const ATOM_Vega10_GFXCLK_Dependency_Table *gfxclk_dep_table = + (const ATOM_Vega10_GFXCLK_Dependency_Table *) + (((unsigned long) powerplay_table) + + le16_to_cpu(powerplay_table->usGfxclkDependencyTableOffset)); + bool is_acg_enabled = false; + ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_v2; + + if (gfxclk_dep_table->ucRevId == 1) { + patom_record_v2 = + (ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries; + is_acg_enabled = + (bool)patom_record_v2[gfxclk_dep_table->ucNumEntries-1].ucACGEnable; + } + + if (powerplay_table->ulMaxODEngineClock > VEGA10_ENGINECLOCK_HARDMAX && + !is_acg_enabled) + hwmgr->platform_descriptor.overdriveLimit.engineClock = + VEGA10_ENGINECLOCK_HARDMAX; + else + hwmgr->platform_descriptor.overdriveLimit.engineClock = le32_to_cpu(powerplay_table->ulMaxODEngineClock); hwmgr->platform_descriptor.overdriveLimit.memoryClock = le32_to_cpu(powerplay_table->ulMaxODMemoryClock); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 54364444ecd1..0c8212902275 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -753,6 +753,22 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr) return 0; } +static int vega12_run_acg_btc(struct pp_hwmgr *hwmgr) +{ + uint32_t result; + + PP_ASSERT_WITH_CODE( + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc) == 0, + "[Run_ACG_BTC] Attempt to run ACG BTC failed!", + return -EINVAL); + + result = smum_get_argument(hwmgr); + PP_ASSERT_WITH_CODE(result == 1, + "Failed to run ACG BTC!", return -EINVAL); + + return 0; +} + static int vega12_set_allowed_featuresmask(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = @@ -931,6 +947,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "Failed to initialize SMC table!", result = tmp_result); + tmp_result = vega12_run_acg_btc(hwmgr); + PP_ASSERT_WITH_CODE(!tmp_result, + "Failed to run ACG BTC!", + result = tmp_result); + result = vega12_enable_all_smu_features(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to enable all smu features!", diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index b5475c91e2ef..e9f343b124b0 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2799,6 +2799,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_MMIO(0x21f0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 5af11cf1b482..e1675a00df12 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -41,7 +41,7 @@ struct intel_gvt_mpt { int (*host_init)(struct device *dev, void *gvt, const void *ops); void (*host_exit)(struct device *dev, void *gvt); int (*attach_vgpu)(void *vgpu, unsigned long *handle); - void (*detach_vgpu)(unsigned long handle); + void (*detach_vgpu)(void *vgpu); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); unsigned long (*from_virt_to_mfn)(void *p); int (*enable_page_track)(unsigned long handle, u64 gfn); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c1072143da1d..dd3dfd00f4e6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -996,7 +996,7 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) { unsigned int index; u64 virtaddr; - unsigned long req_size, pgoff = 0; + unsigned long req_size, pgoff, req_start; pgprot_t pg_prot; struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); @@ -1014,7 +1014,17 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) pg_prot = vma->vm_page_prot; virtaddr = vma->vm_start; req_size = vma->vm_end - vma->vm_start; - pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (!intel_vgpu_in_aperture(vgpu, req_start)) + return -EINVAL; + if (req_start + req_size > + vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu)) + return -EINVAL; + + pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff; return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot); } @@ -1662,9 +1672,21 @@ static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) return 0; } -static void kvmgt_detach_vgpu(unsigned long handle) +static void kvmgt_detach_vgpu(void *p_vgpu) { - /* nothing to do here */ + int i; + struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; + + if (!vgpu->vdev.region) + return; + + for (i = 0; i < vgpu->vdev.num_regions; i++) + if (vgpu->vdev.region[i].ops->release) + vgpu->vdev.region[i].ops->release(vgpu, + &vgpu->vdev.region[i]); + vgpu->vdev.num_regions = 0; + kfree(vgpu->vdev.region); + vgpu->vdev.region = NULL; } static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 67f19992b226..3ed34123d8d1 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -101,7 +101,7 @@ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu) if (!intel_gvt_host.mpt->detach_vgpu) return; - intel_gvt_host.mpt->detach_vgpu(vgpu->handle); + intel_gvt_host.mpt->detach_vgpu(vgpu); } #define MSI_CAP_CONTROL(offset) (offset + 2) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 5567ddc7760f..55bb7885e228 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -332,6 +332,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); i915_gem_object_put(wa_ctx->indirect_ctx.obj); + + wa_ctx->indirect_ctx.obj = NULL; + wa_ctx->indirect_ctx.shadow_va = NULL; } static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, @@ -911,11 +914,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); - if (!workload->status) { - release_shadow_batch_buffer(workload); - release_shadow_wa_ctx(&workload->wa_ctx); - } - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, @@ -1283,6 +1281,9 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu_submission *s = &workload->vgpu->submission; + release_shadow_batch_buffer(workload); + release_shadow_wa_ctx(&workload->wa_ctx); + if (workload->shadow_mm) intel_vgpu_mm_put(workload->shadow_mm); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4796f40a6d4f..eab9341a5152 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -303,6 +303,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) */ if (!(prio & I915_PRIORITY_NEWCLIENT)) { prio |= I915_PRIORITY_NEWCLIENT; + active->sched.attr.priority = prio; list_move_tail(&active->sched.link, i915_sched_lookup_priolist(engine, prio)); } @@ -645,6 +646,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine) int i; priolist_for_each_request_consume(rq, rn, p, i) { + GEM_BUG_ON(last && + need_preempt(engine, last, rq_prio(rq))); + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c index 75d97f1b2e8f..4f5c67f70c4d 100644 --- a/drivers/gpu/drm/meson/meson_crtc.c +++ b/drivers/gpu/drm/meson/meson_crtc.c @@ -46,7 +46,6 @@ struct meson_crtc { struct drm_crtc base; struct drm_pending_vblank_event *event; struct meson_drm *priv; - bool enabled; }; #define to_meson_crtc(x) container_of(x, struct meson_crtc, base) @@ -82,7 +81,8 @@ static const struct drm_crtc_funcs meson_crtc_funcs = { }; -static void meson_crtc_enable(struct drm_crtc *crtc) +static void meson_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct meson_crtc *meson_crtc = to_meson_crtc(crtc); struct drm_crtc_state *crtc_state = crtc->state; @@ -108,20 +108,6 @@ static void meson_crtc_enable(struct drm_crtc *crtc) drm_crtc_vblank_on(crtc); - meson_crtc->enabled = true; -} - -static void meson_crtc_atomic_enable(struct drm_crtc *crtc, - struct drm_crtc_state *old_state) -{ - struct meson_crtc *meson_crtc = to_meson_crtc(crtc); - struct meson_drm *priv = meson_crtc->priv; - - DRM_DEBUG_DRIVER("\n"); - - if (!meson_crtc->enabled) - meson_crtc_enable(crtc); - priv->viu.osd1_enabled = true; } @@ -153,8 +139,6 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc, crtc->state->event = NULL; } - - meson_crtc->enabled = false; } static void meson_crtc_atomic_begin(struct drm_crtc *crtc, @@ -163,9 +147,6 @@ static void meson_crtc_atomic_begin(struct drm_crtc *crtc, struct meson_crtc *meson_crtc = to_meson_crtc(crtc); unsigned long flags; - if (crtc->state->enable && !meson_crtc->enabled) - meson_crtc_enable(crtc); - if (crtc->state->event) { WARN_ON(drm_crtc_vblank_get(crtc) != 0); diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 3ee4d4a4ecba..12ff47b13668 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -75,6 +75,10 @@ static const struct drm_mode_config_funcs meson_mode_config_funcs = { .fb_create = drm_gem_fb_create, }; +static const struct drm_mode_config_helper_funcs meson_mode_config_helpers = { + .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, +}; + static irqreturn_t meson_irq(int irq, void *arg) { struct drm_device *dev = arg; @@ -266,6 +270,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) drm->mode_config.max_width = 3840; drm->mode_config.max_height = 2160; drm->mode_config.funcs = &meson_mode_config_funcs; + drm->mode_config.helper_private = &meson_mode_config_helpers; /* Hardware Initialization */ @@ -388,8 +393,10 @@ static int meson_probe_remote(struct platform_device *pdev, remote_node = of_graph_get_remote_port_parent(ep); if (!remote_node || remote_node == parent || /* Ignore parent endpoint */ - !of_device_is_available(remote_node)) + !of_device_is_available(remote_node)) { + of_node_put(remote_node); continue; + } count += meson_probe_remote(pdev, match, remote, remote_node); @@ -408,10 +415,13 @@ static int meson_drv_probe(struct platform_device *pdev) for_each_endpoint_of_node(np, ep) { remote = of_graph_get_remote_port_parent(ep); - if (!remote || !of_device_is_available(remote)) + if (!remote || !of_device_is_available(remote)) { + of_node_put(remote); continue; + } count += meson_probe_remote(pdev, &match, np, remote); + of_node_put(remote); } if (count && !match) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 5beb83d1cf87..ce1b3cc4bf6d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -944,7 +944,7 @@ static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq) np = dev_pm_opp_get_of_node(opp); if (np) { - of_property_read_u32(np, "qcom,level", &val); + of_property_read_u32(np, "opp-level", &val); of_node_put(np); } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2e4372ef17a3..2cfee1a4fe0b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -765,7 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu->rev = config->rev; adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; - adreno_gpu_config.irqname = "kgsl_3d0_irq"; adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index fd75870eb17f..6aefcd6db46b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -365,19 +365,6 @@ static void _dpu_plane_set_qos_ctrl(struct drm_plane *plane, &pdpu->pipe_qos_cfg); } -static void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable) -{ - struct dpu_plane *pdpu = to_dpu_plane(plane); - struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); - - if (!pdpu->is_rt_pipe) - return; - - pm_runtime_get_sync(&dpu_kms->pdev->dev); - _dpu_plane_set_qos_ctrl(plane, enable, DPU_PLANE_QOS_PANIC_CTRL); - pm_runtime_put_sync(&dpu_kms->pdev->dev); -} - /** * _dpu_plane_set_ot_limit - set OT limit for the given plane * @plane: Pointer to drm plane @@ -1248,6 +1235,19 @@ static void dpu_plane_reset(struct drm_plane *plane) } #ifdef CONFIG_DEBUG_FS +static void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable) +{ + struct dpu_plane *pdpu = to_dpu_plane(plane); + struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane); + + if (!pdpu->is_rt_pipe) + return; + + pm_runtime_get_sync(&dpu_kms->pdev->dev); + _dpu_plane_set_qos_ctrl(plane, enable, DPU_PLANE_QOS_PANIC_CTRL); + pm_runtime_put_sync(&dpu_kms->pdev->dev); +} + static ssize_t _dpu_plane_danger_read(struct file *file, char __user *buff, size_t count, loff_t *ppos) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 9cd6a96c6bf2..927e5d86f7c1 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -250,7 +250,8 @@ void msm_gem_purge_vma(struct msm_gem_address_space *aspace, void msm_gem_unmap_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma); int msm_gem_map_vma(struct msm_gem_address_space *aspace, - struct msm_gem_vma *vma, struct sg_table *sgt, int npages); + struct msm_gem_vma *vma, int prot, + struct sg_table *sgt, int npages); void msm_gem_close_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma); @@ -333,6 +334,7 @@ void msm_gem_kernel_put(struct drm_gem_object *bo, struct drm_gem_object *msm_gem_import(struct drm_device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); +__printf(2, 3) void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...); int msm_framebuffer_prepare(struct drm_framebuffer *fb, @@ -396,12 +398,14 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); void msm_rd_debugfs_cleanup(struct msm_drm_private *priv); +__printf(3, 4) void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, const char *fmt, ...); int msm_perf_debugfs_init(struct drm_minor *minor); void msm_perf_debugfs_cleanup(struct msm_drm_private *priv); #else static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; } +__printf(3, 4) static inline void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit, const char *fmt, ...) {} static inline void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) {} diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 51a95da694d8..c8886d3071fa 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -391,6 +391,10 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj, struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; struct page **pages; + int prot = IOMMU_READ; + + if (!(msm_obj->flags & MSM_BO_GPU_READONLY)) + prot |= IOMMU_WRITE; WARN_ON(!mutex_is_locked(&msm_obj->lock)); @@ -405,8 +409,8 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj, if (IS_ERR(pages)) return PTR_ERR(pages); - return msm_gem_map_vma(aspace, vma, msm_obj->sgt, - obj->size >> PAGE_SHIFT); + return msm_gem_map_vma(aspace, vma, prot, + msm_obj->sgt, obj->size >> PAGE_SHIFT); } /* get iova and pin it. Should have a matching put */ diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 557360788084..49c04829cf34 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -68,7 +68,8 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace, int msm_gem_map_vma(struct msm_gem_address_space *aspace, - struct msm_gem_vma *vma, struct sg_table *sgt, int npages) + struct msm_gem_vma *vma, int prot, + struct sg_table *sgt, int npages) { unsigned size = npages << PAGE_SHIFT; int ret = 0; @@ -86,7 +87,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace, if (aspace->mmu) ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt, - size, IOMMU_READ | IOMMU_WRITE); + size, prot); if (ret) vma->mapped = false; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 5f3eff304355..10babd18e286 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -900,7 +900,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, } /* Get Interrupt: */ - gpu->irq = platform_get_irq_byname(pdev, config->irqname); + gpu->irq = platform_get_irq(pdev, 0); if (gpu->irq < 0) { ret = gpu->irq; DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index efb49bb64191..ca17086f72c9 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -31,7 +31,6 @@ struct msm_gpu_state; struct msm_gpu_config { const char *ioname; - const char *irqname; uint64_t va_start; uint64_t va_end; unsigned int nr_rings; @@ -63,7 +62,7 @@ struct msm_gpu_funcs { struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); -#ifdef CONFIG_DEBUG_FS +#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) /* show GPU status in debugfs: */ void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state, struct drm_printer *p); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index 90e9d0a48dc0..d21172933d92 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -115,7 +115,9 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz) char *fptr = &fifo->buf[fifo->head]; int n; - wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0); + wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0 || !rd->open); + if (!rd->open) + return; /* Note that smp_load_acquire() is not strictly required * as CIRC_SPACE_TO_END() does not access the tail more @@ -213,7 +215,10 @@ out: static int rd_release(struct inode *inode, struct file *file) { struct msm_rd_state *rd = inode->i_private; + rd->open = false; + wake_up_all(&rd->fifo_event); + return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bfbc9341e0c2..d9edb5785813 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2434,6 +2434,38 @@ nv140_chipset = { .sec2 = gp102_sec2_new, }; +static const struct nvkm_device_chip +nv162_chipset = { + .name = "TU102", + .bar = tu104_bar_new, + .bios = nvkm_bios_new, + .bus = gf100_bus_new, + .devinit = tu104_devinit_new, + .fault = tu104_fault_new, + .fb = gv100_fb_new, + .fuse = gm107_fuse_new, + .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, + .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, + .ltc = gp102_ltc_new, + .mc = tu104_mc_new, + .mmu = tu104_mmu_new, + .pci = gp100_pci_new, + .pmu = gp102_pmu_new, + .therm = gp100_therm_new, + .timer = gk20a_timer_new, + .top = gk104_top_new, + .ce[0] = tu104_ce_new, + .ce[1] = tu104_ce_new, + .ce[2] = tu104_ce_new, + .ce[3] = tu104_ce_new, + .ce[4] = tu104_ce_new, + .disp = tu104_disp_new, + .dma = gv100_dma_new, + .fifo = tu104_fifo_new, +}; + static const struct nvkm_device_chip nv164_chipset = { .name = "TU104", @@ -2950,6 +2982,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x138: device->chip = &nv138_chipset; break; case 0x13b: device->chip = &nv13b_chipset; break; case 0x140: device->chip = &nv140_chipset; break; + case 0x162: device->chip = &nv162_chipset; break; case 0x164: device->chip = &nv164_chipset; break; case 0x166: device->chip = &nv166_chipset; break; default: diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 13c8a662f9b4..ccb090f3ab30 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -250,14 +250,10 @@ static struct drm_driver qxl_driver = { #if defined(CONFIG_DEBUG_FS) .debugfs_init = qxl_debugfs_init, #endif - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = qxl_gem_prime_pin, .gem_prime_unpin = qxl_gem_prime_unpin, - .gem_prime_get_sg_table = qxl_gem_prime_get_sg_table, - .gem_prime_import_sg_table = qxl_gem_prime_import_sg_table, .gem_prime_vmap = qxl_gem_prime_vmap, .gem_prime_vunmap = qxl_gem_prime_vunmap, .gem_prime_mmap = qxl_gem_prime_mmap, diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c index a55dece118b2..df65d3c1a7b8 100644 --- a/drivers/gpu/drm/qxl/qxl_prime.c +++ b/drivers/gpu/drm/qxl/qxl_prime.c @@ -38,20 +38,6 @@ void qxl_gem_prime_unpin(struct drm_gem_object *obj) WARN_ONCE(1, "not implemented"); } -struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - WARN_ONCE(1, "not implemented"); - return ERR_PTR(-ENOSYS); -} - -struct drm_gem_object *qxl_gem_prime_import_sg_table( - struct drm_device *dev, struct dma_buf_attachment *attach, - struct sg_table *table) -{ - WARN_ONCE(1, "not implemented"); - return ERR_PTR(-ENOSYS); -} - void *qxl_gem_prime_vmap(struct drm_gem_object *obj) { WARN_ONCE(1, "not implemented"); diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index 96ac1458a59c..37f93022a106 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -113,8 +113,10 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev, child_count++; ret = drm_of_find_panel_or_bridge(dev->of_node, 0, endpoint_id, &panel, &bridge); - if (!ret) + if (!ret) { + of_node_put(endpoint); break; + } } of_node_put(port); diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 9e9255ee59cd..a021bab11a4f 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -786,17 +786,18 @@ static struct sun4i_frontend *sun4i_backend_find_frontend(struct sun4i_drv *drv, remote = of_graph_get_remote_port_parent(ep); if (!remote) continue; + of_node_put(remote); /* does this node match any registered engines? */ list_for_each_entry(frontend, &drv->frontend_list, list) { if (remote == frontend->node) { - of_node_put(remote); of_node_put(port); + of_node_put(ep); return frontend; } } } - + of_node_put(port); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 061d2e0d9011..416da5376701 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -92,6 +92,8 @@ static void sun4i_hdmi_disable(struct drm_encoder *encoder) val = readl(hdmi->base + SUN4I_HDMI_VID_CTRL_REG); val &= ~SUN4I_HDMI_VID_CTRL_ENABLE; writel(val, hdmi->base + SUN4I_HDMI_VID_CTRL_REG); + + clk_disable_unprepare(hdmi->tmds_clk); } static void sun4i_hdmi_enable(struct drm_encoder *encoder) @@ -102,6 +104,8 @@ static void sun4i_hdmi_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling the HDMI Output\n"); + clk_prepare_enable(hdmi->tmds_clk); + sun4i_hdmi_setup_avi_infoframes(hdmi, mode); val |= SUN4I_HDMI_PKT_CTRL_TYPE(0, SUN4I_HDMI_PKT_AVI); val |= SUN4I_HDMI_PKT_CTRL_TYPE(1, SUN4I_HDMI_PKT_END); diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index f7f32a885af7..2d1aaca49105 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -127,14 +127,10 @@ static struct drm_driver driver = { #if defined(CONFIG_DEBUG_FS) .debugfs_init = virtio_gpu_debugfs_init, #endif - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = virtgpu_gem_prime_pin, .gem_prime_unpin = virtgpu_gem_prime_unpin, - .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table, - .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table, .gem_prime_vmap = virtgpu_gem_prime_vmap, .gem_prime_vunmap = virtgpu_gem_prime_vunmap, .gem_prime_mmap = virtgpu_gem_prime_mmap, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 1deb41d42ea4..0c15000f926e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -372,10 +372,6 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait); /* virtgpu_prime.c */ int virtgpu_gem_prime_pin(struct drm_gem_object *obj); void virtgpu_gem_prime_unpin(struct drm_gem_object *obj); -struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object *virtgpu_gem_prime_import_sg_table( - struct drm_device *dev, struct dma_buf_attachment *attach, - struct sg_table *sgt); void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj); void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int virtgpu_gem_prime_mmap(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 86ce0ae93f59..c59ec34c80a5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -39,20 +39,6 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj) WARN_ONCE(1, "not implemented"); } -struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) -{ - WARN_ONCE(1, "not implemented"); - return ERR_PTR(-ENODEV); -} - -struct drm_gem_object *virtgpu_gem_prime_import_sg_table( - struct drm_device *dev, struct dma_buf_attachment *attach, - struct sg_table *table) -{ - WARN_ONCE(1, "not implemented"); - return ERR_PTR(-ENODEV); -} - void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj) { struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig index b677e5d524e6..d5f1d8e1c6f8 100644 --- a/drivers/gpu/vga/Kconfig +++ b/drivers/gpu/vga/Kconfig @@ -21,6 +21,7 @@ config VGA_SWITCHEROO bool "Laptop Hybrid Graphics - GPU switching support" depends on X86 depends on ACPI + depends on PCI select VGA_ARB help Many laptops released in 2008/9/10 have two GPUs with a multiplexer diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index f41d5fe51abe..9993b692598f 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -125,6 +125,7 @@ static int open_collection(struct hid_parser *parser, unsigned type) { struct hid_collection *collection; unsigned usage; + int collection_index; usage = parser->local.usage[0]; @@ -167,13 +168,13 @@ static int open_collection(struct hid_parser *parser, unsigned type) parser->collection_stack[parser->collection_stack_ptr++] = parser->device->maxcollection; - collection = parser->device->collection + - parser->device->maxcollection++; + collection_index = parser->device->maxcollection++; + collection = parser->device->collection + collection_index; collection->type = type; collection->usage = usage; collection->level = parser->collection_stack_ptr - 1; - collection->parent = parser->active_collection; - parser->active_collection = collection; + collection->parent_idx = (collection->level == 0) ? -1 : + parser->collection_stack[collection->level - 1]; if (type == HID_COLLECTION_APPLICATION) parser->device->maxapplication++; @@ -192,8 +193,6 @@ static int close_collection(struct hid_parser *parser) return -EINVAL; } parser->collection_stack_ptr--; - if (parser->active_collection) - parser->active_collection = parser->active_collection->parent; return 0; } @@ -1006,10 +1005,12 @@ static void hid_apply_multiplier_to_field(struct hid_device *hid, usage = &field->usage[i]; collection = &hid->collection[usage->collection_index]; - while (collection && collection != multiplier_collection) - collection = collection->parent; + while (collection->parent_idx != -1 && + collection != multiplier_collection) + collection = &hid->collection[collection->parent_idx]; - if (collection || multiplier_collection == NULL) + if (collection->parent_idx != -1 || + multiplier_collection == NULL) usage->resolution_multiplier = effective_multiplier; } @@ -1044,9 +1045,9 @@ static void hid_apply_multiplier(struct hid_device *hid, * applicable fields later. */ multiplier_collection = &hid->collection[multiplier->usage->collection_index]; - while (multiplier_collection && + while (multiplier_collection->parent_idx != -1 && multiplier_collection->type != HID_COLLECTION_LOGICAL) - multiplier_collection = multiplier_collection->parent; + multiplier_collection = &hid->collection[multiplier_collection->parent_idx]; effective_multiplier = hid_calculate_multiplier(hid, multiplier); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 518fa76414f5..24f846d67478 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -461,6 +461,9 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A 0x010a #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 +#define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 + #define USB_VENDOR_ID_GOODTOUCH 0x1aad #define USB_DEVICE_ID_GOODTOUCH_000f 0x000f diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 8555ce7e737b..c5edfa966343 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -179,6 +179,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_DELAY_AFTER_SLEEP }, { USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_8001, I2C_HID_QUIRK_NO_RUNTIME_PM }, + { I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01F0, + I2C_HID_QUIRK_NO_RUNTIME_PM }, { 0, 0 } }; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index ce0ba2062723..bea4c9850247 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -701,19 +701,12 @@ static int vmbus_close_internal(struct vmbus_channel *channel) int vmbus_disconnect_ring(struct vmbus_channel *channel) { struct vmbus_channel *cur_channel, *tmp; - unsigned long flags; - LIST_HEAD(list); int ret; if (channel->primary_channel != NULL) return -EINVAL; - /* Snapshot the list of subchannels */ - spin_lock_irqsave(&channel->lock, flags); - list_splice_init(&channel->sc_list, &list); - spin_unlock_irqrestore(&channel->lock, flags); - - list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) { + list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) { if (cur_channel->rescind) wait_for_completion(&cur_channel->rescind_event); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 5301fef16c31..7c6349a50ef1 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -888,12 +888,14 @@ static unsigned long handle_pg_range(unsigned long pg_start, pfn_cnt -= pgs_ol; /* * Check if the corresponding memory block is already - * online by checking its last previously backed page. - * In case it is we need to bring rest (which was not - * backed previously) online too. + * online. It is possible to observe struct pages still + * being uninitialized here so check section instead. + * In case the section is online we need to bring the + * rest of pfns (which were not backed previously) + * online too. */ if (start_pfn > has->start_pfn && - !PageReserved(pfn_to_page(start_pfn - 1))) + online_section_nr(pfn_to_section_nr(start_pfn))) hv_bring_pgs_online(has, start_pfn, pgs_ol); } diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 64d0c85d5161..1f1a55e07733 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, } /* Get various debug metrics for the specified ring buffer. */ -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info) +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) { u32 bytes_avail_towrite; u32 bytes_avail_toread; - if (ring_info->ring_buffer) { - hv_get_ringbuffer_availbytes(ring_info, - &bytes_avail_toread, - &bytes_avail_towrite); + if (!ring_info->ring_buffer) + return -EINVAL; - debug_info->bytes_avail_toread = bytes_avail_toread; - debug_info->bytes_avail_towrite = bytes_avail_towrite; - debug_info->current_read_index = - ring_info->ring_buffer->read_index; - debug_info->current_write_index = - ring_info->ring_buffer->write_index; - debug_info->current_interrupt_mask = - ring_info->ring_buffer->interrupt_mask; - } + hv_get_ringbuffer_availbytes(ring_info, + &bytes_avail_toread, + &bytes_avail_towrite); + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = ring_info->ring_buffer->read_index; + debug_info->current_write_index = ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask + = ring_info->ring_buffer->interrupt_mask; + return 0; } EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d0ff65675292..403fee01572c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -313,12 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -328,12 +332,15 @@ static ssize_t out_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -344,12 +351,15 @@ static ssize_t out_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_write_index); } static DEVICE_ATTR_RO(out_write_index); @@ -360,12 +370,15 @@ static ssize_t out_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_toread); } static DEVICE_ATTR_RO(out_read_bytes_avail); @@ -376,12 +389,15 @@ static ssize_t out_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(out_write_bytes_avail); @@ -391,12 +407,15 @@ static ssize_t in_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_interrupt_mask); } static DEVICE_ATTR_RO(in_intr_mask); @@ -406,12 +425,15 @@ static ssize_t in_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_read_index); } static DEVICE_ATTR_RO(in_read_index); @@ -421,12 +443,15 @@ static ssize_t in_write_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.current_write_index); } static DEVICE_ATTR_RO(in_write_index); @@ -437,12 +462,15 @@ static ssize_t in_read_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_toread); } static DEVICE_ATTR_RO(in_read_bytes_avail); @@ -453,12 +481,15 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info inbound; + int ret; if (!hv_dev->channel) return -ENODEV; - if (hv_dev->channel->state != CHANNEL_OPENED_STATE) - return -EINVAL; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", inbound.bytes_avail_towrite); } static DEVICE_ATTR_RO(in_write_bytes_avail); diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 0e30fa00204c..f9b8e3e23a8e 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -393,8 +393,10 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr, } rv = lm80_read_value(client, LM80_REG_FANDIV); - if (rv < 0) + if (rv < 0) { + mutex_unlock(&data->update_lock); return rv; + } reg = (rv & ~(3 << (2 * (nr + 1)))) | (data->fan_div[nr] << (2 * (nr + 1))); lm80_write_value(client, LM80_REG_FANDIV, reg); diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index c3040079b1cb..4adec4ab7d06 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -44,8 +44,8 @@ * nct6796d 14 7 7 2+6 0xd420 0xc1 0x5ca3 * nct6797d 14 7 7 2+6 0xd450 0xc1 0x5ca3 * (0xd451) - * nct6798d 14 7 7 2+6 0xd458 0xc1 0x5ca3 - * (0xd459) + * nct6798d 14 7 7 2+6 0xd428 0xc1 0x5ca3 + * (0xd429) * * #temp lists the number of monitored temperature sources (first value) plus * the number of directly connectable temperature sensors (second value). @@ -138,7 +138,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal"); #define SIO_NCT6795_ID 0xd350 #define SIO_NCT6796_ID 0xd420 #define SIO_NCT6797_ID 0xd450 -#define SIO_NCT6798_ID 0xd458 +#define SIO_NCT6798_ID 0xd428 #define SIO_ID_MASK 0xFFF8 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 }; @@ -4508,7 +4508,8 @@ static int __maybe_unused nct6775_resume(struct device *dev) if (data->kind == nct6791 || data->kind == nct6792 || data->kind == nct6793 || data->kind == nct6795 || - data->kind == nct6796) + data->kind == nct6796 || data->kind == nct6797 || + data->kind == nct6798) nct6791_enable_io_mapping(sioreg); superio_exit(sioreg); @@ -4644,7 +4645,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || sio_data->kind == nct6793 || sio_data->kind == nct6795 || - sio_data->kind == nct6796) + sio_data->kind == nct6796 || sio_data->kind == nct6797 || + sio_data->kind == nct6798) nct6791_enable_io_mapping(sioaddr); superio_exit(sioaddr); diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 423903f87955..391118c8aae8 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -380,8 +380,8 @@ static ssize_t occ_show_power_1(struct device *dev, val *= 1000000ULL; break; case 2: - val = get_unaligned_be32(&power->update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->update_tag) * + occ->powr_sample_time_us; break; case 3: val = get_unaligned_be16(&power->value) * 1000000ULL; @@ -425,8 +425,8 @@ static ssize_t occ_show_power_2(struct device *dev, &power->update_tag); break; case 2: - val = get_unaligned_be32(&power->update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->update_tag) * + occ->powr_sample_time_us; break; case 3: val = get_unaligned_be16(&power->value) * 1000000ULL; @@ -463,8 +463,8 @@ static ssize_t occ_show_power_a0(struct device *dev, &power->system.update_tag); break; case 2: - val = get_unaligned_be32(&power->system.update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->system.update_tag) * + occ->powr_sample_time_us; break; case 3: val = get_unaligned_be16(&power->system.value) * 1000000ULL; @@ -477,8 +477,8 @@ static ssize_t occ_show_power_a0(struct device *dev, &power->proc.update_tag); break; case 6: - val = get_unaligned_be32(&power->proc.update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->proc.update_tag) * + occ->powr_sample_time_us; break; case 7: val = get_unaligned_be16(&power->proc.value) * 1000000ULL; @@ -491,8 +491,8 @@ static ssize_t occ_show_power_a0(struct device *dev, &power->vdd.update_tag); break; case 10: - val = get_unaligned_be32(&power->vdd.update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->vdd.update_tag) * + occ->powr_sample_time_us; break; case 11: val = get_unaligned_be16(&power->vdd.value) * 1000000ULL; @@ -505,8 +505,8 @@ static ssize_t occ_show_power_a0(struct device *dev, &power->vdn.update_tag); break; case 14: - val = get_unaligned_be32(&power->vdn.update_tag) * - occ->powr_sample_time_us; + val = (u64)get_unaligned_be32(&power->vdn.update_tag) * + occ->powr_sample_time_us; break; case 15: val = get_unaligned_be16(&power->vdn.value) * 1000000ULL; diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index 8844c9565d2a..7053be59ad2e 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -88,7 +88,7 @@ static const struct of_device_id tmp421_of_match[] = { .data = (void *)2 }, { - .compatible = "ti,tmp422", + .compatible = "ti,tmp442", .data = (void *)3 }, { }, diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c index b532e2c9cf5c..f8c00b94817f 100644 --- a/drivers/i3c/master/dw-i3c-master.c +++ b/drivers/i3c/master/dw-i3c-master.c @@ -901,9 +901,6 @@ static int dw_i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev, master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); - if (!old_dyn_addr) - return 0; - master->addrs[data->index] = dev->info.dyn_addr; return 0; @@ -925,11 +922,11 @@ static int dw_i3c_master_attach_i3c_dev(struct i3c_dev_desc *dev) return -ENOMEM; data->index = pos; - master->addrs[pos] = dev->info.dyn_addr; + master->addrs[pos] = dev->info.dyn_addr ? : dev->info.static_addr; master->free_pos &= ~BIT(pos); i3c_dev_set_master_data(dev, data); - writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(dev->info.dyn_addr), + writel(DEV_ADDR_TABLE_DYNAMIC_ADDR(master->addrs[pos]), master->regs + DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index)); diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index bbd79b8b1a80..8889a4fdb454 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -1556,8 +1556,8 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) return PTR_ERR(master->pclk); master->sysclk = devm_clk_get(&pdev->dev, "sysclk"); - if (IS_ERR(master->pclk)) - return PTR_ERR(master->pclk); + if (IS_ERR(master->sysclk)) + return PTR_ERR(master->sysclk); irq = platform_get_irq(pdev, 0); if (irq < 0) diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 4c8c7a620d08..a5dc13576394 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -544,7 +544,7 @@ void ide_proc_port_register_devices(ide_hwif_t *hwif) drive->proc = proc_mkdir(drive->name, parent); if (drive->proc) { ide_add_proc_entries(drive->proc, generic_drive_entries, drive); - proc_create_data("setting", S_IFREG|S_IRUSR|S_IWUSR, + proc_create_data("settings", S_IFREG|S_IRUSR|S_IWUSR, drive->proc, &ide_settings_proc_fops, drive); } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 63a7cc00bae0..84f077b2b90a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -494,7 +494,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - rdma_restrack_kadd(&id_priv->res); + if (id_priv->res.kern_name) + rdma_restrack_kadd(&id_priv->res); + else + rdma_restrack_uadd(&id_priv->res); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e600fc23ae62..3c97a8b6bf1e 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -584,10 +584,6 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb, if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) goto err; - if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && - nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, - pd->unsafe_global_rkey)) - goto err; if (fill_res_name_pid(msg, res)) goto err; diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index be6b8e1257d0..69f8db66925e 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -106,6 +106,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool commit); +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); + void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 6b12cc5f97b2..3317300ab036 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -60,6 +60,10 @@ static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp, { int ret; + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + return uverbs_copy_to_struct_or_zero( + attrs, UVERBS_ATTR_CORE_OUT, resp, resp_len); + if (copy_to_user(attrs->ucore.outbuf, resp, min(attrs->ucore.outlen, resp_len))) return -EFAULT; @@ -1181,6 +1185,9 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) goto out_put; } + if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT)) + ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); + ret = 0; out_put: @@ -2012,8 +2019,10 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) return -ENOMEM; qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs); - if (!qp) + if (!qp) { + ret = -EINVAL; goto out; + } is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8c81ff698052..0ca04d224015 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -144,6 +144,21 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 0, uattr->len - len); } +static int uverbs_set_output(const struct uverbs_attr_bundle *bundle, + const struct uverbs_attr *attr) +{ + struct bundle_priv *pbundle = + container_of(bundle, struct bundle_priv, bundle); + u16 flags; + + flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | + UVERBS_ATTR_F_VALID_OUTPUT; + if (put_user(flags, + &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) + return -EFAULT; + return 0; +} + static int uverbs_process_idrs_array(struct bundle_priv *pbundle, const struct uverbs_api_attr *attr_uapi, struct uverbs_objs_arr_attr *attr, @@ -455,6 +470,19 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, ret = handler(&pbundle->bundle); } + /* + * Until the drivers are revised to use the bundle directly we have to + * assume that the driver wrote to its UHW_OUT and flag userspace + * appropriately. + */ + if (!ret && pbundle->method_elm->has_udata) { + const struct uverbs_attr *attr = + uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT); + + if (!IS_ERR(attr)) + ret = uverbs_set_output(&pbundle->bundle, attr); + } + /* * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can * not invoke the method because the request is not supported. No @@ -706,10 +734,7 @@ void uverbs_fill_udata(struct uverbs_attr_bundle *bundle, int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size) { - struct bundle_priv *pbundle = - container_of(bundle, struct bundle_priv, bundle); const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); - u16 flags; size_t min_size; if (IS_ERR(attr)) @@ -719,16 +744,25 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx, if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size)) return -EFAULT; - flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags | - UVERBS_ATTR_F_VALID_OUTPUT; - if (put_user(flags, - &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags)) - return -EFAULT; - - return 0; + return uverbs_set_output(bundle, attr); } EXPORT_SYMBOL(uverbs_copy_to); + +/* + * This is only used if the caller has directly used copy_to_use to write the + * data. It signals to user space that the buffer is filled in. + */ +int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + return uverbs_set_output(bundle, attr); +} + int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) @@ -757,8 +791,10 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, { const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx); - if (clear_user(u64_to_user_ptr(attr->ptr_attr.data), - attr->ptr_attr.len)) - return -EFAULT; + if (size < attr->ptr_attr.len) { + if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size, + attr->ptr_attr.len - size)) + return -EFAULT; + } return uverbs_copy_to(bundle, idx, from, size); } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fb0007aa0c27..2890a77339e1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -690,6 +690,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf += sizeof(hdr); + memset(bundle.attr_present, 0, sizeof(bundle.attr_present)); bundle.ufile = file; if (!method_elm->is_ex) { size_t in_len = hdr.in_words * 4 - sizeof(hdr); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 82cb6b71ac7c..e3e9dd54caa2 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -534,7 +534,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, { struct mthca_ucontext *context; - qp = kmalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); @@ -600,7 +600,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); - qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 42b8685c997e..3c633ab58052 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -427,7 +427,40 @@ static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state) static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) { - return (enum pvrdma_wr_opcode)op; + switch (op) { + case IB_WR_RDMA_WRITE: + return PVRDMA_WR_RDMA_WRITE; + case IB_WR_RDMA_WRITE_WITH_IMM: + return PVRDMA_WR_RDMA_WRITE_WITH_IMM; + case IB_WR_SEND: + return PVRDMA_WR_SEND; + case IB_WR_SEND_WITH_IMM: + return PVRDMA_WR_SEND_WITH_IMM; + case IB_WR_RDMA_READ: + return PVRDMA_WR_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_ATOMIC_CMP_AND_SWP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_ATOMIC_FETCH_AND_ADD; + case IB_WR_LSO: + return PVRDMA_WR_LSO; + case IB_WR_SEND_WITH_INV: + return PVRDMA_WR_SEND_WITH_INV; + case IB_WR_RDMA_READ_WITH_INV: + return PVRDMA_WR_RDMA_READ_WITH_INV; + case IB_WR_LOCAL_INV: + return PVRDMA_WR_LOCAL_INV; + case IB_WR_REG_MR: + return PVRDMA_WR_FAST_REG_MR; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP; + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: + return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD; + case IB_WR_REG_SIG_MR: + return PVRDMA_WR_REG_SIG_MR; + default: + return PVRDMA_WR_ERROR; + } } static inline enum ib_wc_status pvrdma_wc_status_to_ib( diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 3acf74cbe266..1ec3646087ba 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -721,6 +721,12 @@ int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) wqe_hdr->ex.imm_data = wr->ex.imm_data; + if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) { + *bad_wr = wr; + ret = -EINVAL; + goto out; + } + switch (qp->ibqp.qp_type) { case IB_QPT_GSI: case IB_QPT_UD: diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index cfc8b94527b9..aa4e431cbcd3 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -252,6 +252,8 @@ static const struct xpad_device { { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, + { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, + { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, @@ -428,6 +430,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f X-Box One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori Controllers */ + XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 8ec483e8688b..26ec603fe220 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "../input-compat.h" @@ -405,7 +406,7 @@ static int uinput_open(struct inode *inode, struct file *file) static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, const struct input_absinfo *abs) { - int min, max; + int min, max, range; min = abs->minimum; max = abs->maximum; @@ -417,7 +418,7 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } - if (abs->flat > max - min) { + if (!check_sub_overflow(max, min, &range) && abs->flat > range) { printk(KERN_DEBUG "%s: abs_flat #%02x out of range: %d (min:%d/max:%d)\n", UINPUT_NAME, code, abs->flat, min, max); diff --git a/drivers/input/serio/olpc_apsp.c b/drivers/input/serio/olpc_apsp.c index b36084710f69..bae08226e3d9 100644 --- a/drivers/input/serio/olpc_apsp.c +++ b/drivers/input/serio/olpc_apsp.c @@ -195,6 +195,8 @@ static int olpc_apsp_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; + priv->dev = &pdev->dev; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->base)) { @@ -248,7 +250,6 @@ static int olpc_apsp_probe(struct platform_device *pdev) goto err_irq; } - priv->dev = &pdev->dev; device_init_wakeup(priv->dev, 1); platform_set_drvdata(pdev, priv); diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index af6027cc7bbf..068dbbc610fc 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -698,7 +698,7 @@ config TOUCHSCREEN_EDT_FT5X06 config TOUCHSCREEN_RASPBERRYPI_FW tristate "Raspberry Pi's firmware base touch screen support" - depends on RASPBERRYPI_FIRMWARE || COMPILE_TEST + depends on RASPBERRYPI_FIRMWARE || (RASPBERRYPI_FIRMWARE=n && COMPILE_TEST) help Say Y here if you have the official Raspberry Pi 7 inch screen on your system. diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index d8947b28db2d..f04a6df65eb8 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -224,7 +224,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, * If we have reason to believe the IOMMU driver missed the initial * probe for dev, replay it to get things in order. */ - if (dev->bus && !device_iommu_mapped(dev)) + if (!err && dev->bus && !device_iommu_mapped(dev)) err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index db20e992a40f..7f2a45445b00 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2399,13 +2399,14 @@ static void its_free_device(struct its_device *its_dev) kfree(its_dev); } -static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) +static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) { int idx; - idx = find_first_zero_bit(dev->event_map.lpi_map, - dev->event_map.nr_lpis); - if (idx == dev->event_map.nr_lpis) + idx = bitmap_find_free_region(dev->event_map.lpi_map, + dev->event_map.nr_lpis, + get_count_order(nvecs)); + if (idx < 0) return -ENOSPC; *hwirq = dev->event_map.lpi_base + idx; @@ -2501,21 +2502,21 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, int err; int i; - for (i = 0; i < nr_irqs; i++) { - err = its_alloc_device_irq(its_dev, &hwirq); - if (err) - return err; + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); + if (err) + return err; - err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); + for (i = 0; i < nr_irqs; i++) { + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); if (err) return err; irq_domain_set_hwirq_and_chip(domain, virq + i, - hwirq, &its_irq_chip, its_dev); + hwirq + i, &its_irq_chip, its_dev); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); pr_debug("ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->event_map.lpi_base), - (int) hwirq, virq + i); + (int)(hwirq + i - its_dev->event_map.lpi_base), + (int)(hwirq + i), virq + i); } return 0; diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index ad70e7c416e3..fbfa7ff6deb1 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -24,7 +24,7 @@ struct mbi_range { unsigned long *bm; }; -static struct mutex mbi_lock; +static DEFINE_MUTEX(mbi_lock); static phys_addr_t mbi_phys_base; static struct mbi_range *mbi_ranges; static unsigned int mbi_range_nr; diff --git a/drivers/irqchip/irq-madera.c b/drivers/irqchip/irq-madera.c index e9256dee1a45..8b81271c823c 100644 --- a/drivers/irqchip/irq-madera.c +++ b/drivers/irqchip/irq-madera.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -16,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 6edfd4bfa169..a93296b9b45d 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -822,6 +822,7 @@ out_unmap: static const struct irq_domain_ops stm32_exti_h_domain_ops = { .alloc = stm32_exti_h_domain_alloc, .free = irq_domain_free_irqs_common, + .xlate = irq_domain_xlate_twocell, }; static int diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c index 4ac378e48902..40ca1e8fa09f 100644 --- a/drivers/isdn/hardware/avm/b1.c +++ b/drivers/isdn/hardware/avm/b1.c @@ -423,7 +423,7 @@ void b1_parse_version(avmctrl_info *cinfo) int i, j; for (j = 0; j < AVM_MAXVERSION; j++) - cinfo->version[j] = "\0\0" + 1; + cinfo->version[j] = ""; for (i = 0, j = 0; j < AVM_MAXVERSION && i < cinfo->versionlen; j++, i += cinfo->versionbuf[i] + 1) diff --git a/drivers/isdn/hisax/netjet.c b/drivers/isdn/hisax/netjet.c index e932a152c405..d7b011c8d692 100644 --- a/drivers/isdn/hisax/netjet.c +++ b/drivers/isdn/hisax/netjet.c @@ -332,7 +332,7 @@ static int make_raw_data_56k(struct BCState *bcs) { bitcnt = 0; } val >>= 1; - }; + } fcs = PPP_INITFCS; for (i = 0; i < bcs->tx_skb->len; i++) { val = bcs->tx_skb->data[i]; @@ -415,7 +415,7 @@ static void read_raw(struct BCState *bcs, u_int *buf, int cnt) { else { // it's 56K mask = 0x7f; bits = 7; - }; + } for (i = 0; i < cnt; i++) { val = bcs->channel ? ((*p >> 8) & 0xff) : (*p & 0xff); p++; @@ -623,7 +623,7 @@ void netjet_fill_dma(struct BCState *bcs) else { // it's 56k if (make_raw_data_56k(bcs)) return; - }; + } if (bcs->cs->debug & L1_DEB_HSCX) debugl1(bcs->cs, "tiger fill_dma2: c%d %4lx", bcs->channel, bcs->Flag); diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c index 298c8dba0321..6b8c3fbe3965 100644 --- a/drivers/isdn/hisax/q931.c +++ b/drivers/isdn/hisax/q931.c @@ -598,7 +598,7 @@ prcalling(char *dest, u_char *p) dp += prbits(dp, *++p, 8, 8); *dp++ = '\n'; l--; - }; + } p++; dp += sprintf(dp, " number digits "); diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h index 8cd2d8277426..b421b86ca7da 100644 --- a/drivers/isdn/hisax/st5481.h +++ b/drivers/isdn/hisax/st5481.h @@ -512,7 +512,7 @@ static inline const char *ST5481_CMD_string(int evt) case ST5481_CMD_AR10: return "AR10"; case ST5481_CMD_ARL: return "ARL"; case ST5481_CMD_PDN: return "PDN"; - }; + } sprintf(s, "0x%x", evt); return s; diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c index a4597e96c916..f4253d468ae1 100644 --- a/drivers/isdn/isdnloop/isdnloop.c +++ b/drivers/isdn/isdnloop/isdnloop.c @@ -72,7 +72,7 @@ isdnloop_bchan_send(isdnloop_card *card, int ch) printk(KERN_WARNING "isdnloop: no rcard, skb dropped\n"); dev_kfree_skb(skb); - }; + } cmd.command = ISDN_STAT_BSENT; cmd.parm.length = len; card->interface.statcallb(&cmd); diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c index a2e74feee2b2..fd64df5a57a5 100644 --- a/drivers/leds/leds-lp5523.c +++ b/drivers/leds/leds-lp5523.c @@ -318,7 +318,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip) /* Let the programs run for couple of ms and check the engine status */ usleep_range(3000, 6000); - lp55xx_read(chip, LP5523_REG_STATUS, &status); + ret = lp55xx_read(chip, LP5523_REG_STATUS, &status); + if (ret) + return ret; status &= LP5523_ENG_STATUS_MASK; if (status != LP5523_ENG_STATUS_MASK) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0ff22159a0ca..47d4e0d30bf0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2414,9 +2414,21 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key * capi:cipher_api_spec-iv:ivopts */ tmp = &cipher_in[strlen("capi:")]; - cipher_api = strsep(&tmp, "-"); - *ivmode = strsep(&tmp, ":"); - *ivopts = tmp; + + /* Separate IV options if present, it can contain another '-' in hash name */ + *ivopts = strrchr(tmp, ':'); + if (*ivopts) { + **ivopts = '\0'; + (*ivopts)++; + } + /* Parse IV mode */ + *ivmode = strrchr(tmp, '-'); + if (*ivmode) { + **ivmode = '\0'; + (*ivmode)++; + } + /* The rest is crypto API spec */ + cipher_api = tmp; if (*ivmode && !strcmp(*ivmode, "lmk")) cc->tfms_count = 64; @@ -2486,11 +2498,8 @@ static int crypt_ctr_cipher_old(struct dm_target *ti, char *cipher_in, char *key goto bad_mem; chainmode = strsep(&tmp, "-"); - *ivopts = strsep(&tmp, "-"); - *ivmode = strsep(&*ivopts, ":"); - - if (tmp) - DMWARN("Ignoring unexpected additional cipher options"); + *ivmode = strsep(&tmp, ":"); + *ivopts = tmp; /* * For compatibility with the original dm-crypt mapping format, if diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 20b0776e39ef..ed3caceaed07 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1678,7 +1678,7 @@ int dm_thin_remove_range(struct dm_thin_device *td, return r; } -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { int r; uint32_t ref_count; @@ -1686,7 +1686,7 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu down_read(&pmd->root_lock); r = dm_sm_get_count(pmd->data_sm, b, &ref_count); if (!r) - *result = (ref_count != 0); + *result = (ref_count > 1); up_read(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 35e954ea20a9..f6be0d733c20 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -195,7 +195,7 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); -int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); +int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index dadd9696340c..ca8af21bf644 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1048,7 +1048,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m * passdown we have to check that these blocks are now unused. */ int r = 0; - bool used = true; + bool shared = true; struct thin_c *tc = m->tc; struct pool *pool = tc->pool; dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin; @@ -1058,11 +1058,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m while (b != end) { /* find start of unmapped run */ for (; b < end; b++) { - r = dm_pool_block_is_used(pool->pmd, b, &used); + r = dm_pool_block_is_shared(pool->pmd, b, &shared); if (r) goto out; - if (!used) + if (!shared) break; } @@ -1071,11 +1071,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m /* find end of run */ for (e = b + 1; e != end; e++) { - r = dm_pool_block_is_used(pool->pmd, e, &used); + r = dm_pool_block_is_shared(pool->pmd, e, &shared); if (r) goto out; - if (used) + if (shared) break; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d67c95ef8d7e..2b53c3841b53 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1320,7 +1320,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, __bio_clone_fast(clone, bio); - if (unlikely(bio_integrity(bio) != NULL)) { + if (bio_integrity(bio)) { int r; if (unlikely(!dm_target_has_integrity(tio->ti->type) && @@ -1336,11 +1336,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return r; } - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); - clone->bi_iter.bi_size = to_bytes(len); - - if (unlikely(bio_integrity(bio) != NULL)) - bio_integrity_trim(clone); + bio_trim(clone, sector - clone->bi_iter.bi_sector, len); return 0; } @@ -1588,6 +1584,9 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, ci->sector = bio->bi_iter.bi_sector; } +#define __dm_part_stat_sub(part, field, subnd) \ + (part_stat_get(part, field) -= (subnd)) + /* * Entry point to split a bio into clones and submit them to the targets. */ @@ -1642,7 +1641,21 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, GFP_NOIO, &md->queue->bio_split); ci.io->orig_bio = b; + + /* + * Adjust IO stats for each split, otherwise upon queue + * reentry there will be redundant IO accounting. + * NOTE: this is a stop-gap fix, a proper fix involves + * significant refactoring of DM core's bio splitting + * (by eliminating DM's splitting and just using bio_split) + */ + part_stat_lock(); + __dm_part_stat_sub(&dm_disk(md)->part0, + sectors[op_stat_group(bio_op(bio))], ci.sector_count); + part_stat_unlock(); + bio_chain(b, bio); + trace_block_split(md->queue, b, bio->bi_iter.bi_sector); ret = generic_make_request(bio); break; } @@ -1713,6 +1726,15 @@ out: return ret; } +static blk_qc_t dm_process_bio(struct mapped_device *md, + struct dm_table *map, struct bio *bio) +{ + if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) + return __process_bio(md, map, bio); + else + return __split_and_process_bio(md, map, bio); +} + static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) { struct mapped_device *md = q->queuedata; @@ -1733,10 +1755,7 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) return ret; } - if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - ret = __process_bio(md, map, bio); - else - ret = __split_and_process_bio(md, map, bio); + ret = dm_process_bio(md, map, bio); dm_put_live_table(md, srcu_idx); return ret; @@ -2415,9 +2434,9 @@ static void dm_wq_work(struct work_struct *work) break; if (dm_request_based(md)) - generic_make_request(c); + (void) generic_make_request(c); else - __split_and_process_bio(md, map, c); + (void) dm_process_bio(md, map, c); } dm_put_live_table(md, srcu_idx); diff --git a/drivers/md/md.c b/drivers/md/md.c index fd4af4de03b4..05ffffb8b769 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -207,15 +207,10 @@ static bool create_on_open = true; struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev) { - struct bio *b; - if (!mddev || !bioset_initialized(&mddev->bio_set)) return bio_alloc(gfp_mask, nr_iovecs); - b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set); - if (!b) - return NULL; - return b; + return bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set); } EXPORT_SYMBOL_GPL(bio_alloc_mddev); diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c index d01821a6906a..89d9c4c21037 100644 --- a/drivers/media/platform/vim2m.c +++ b/drivers/media/platform/vim2m.c @@ -807,7 +807,9 @@ static void vim2m_stop_streaming(struct vb2_queue *q) struct vb2_v4l2_buffer *vbuf; unsigned long flags; - cancel_delayed_work_sync(&dev->work_run); + if (v4l2_m2m_get_curr_priv(dev->m2m_dev) == ctx) + cancel_delayed_work_sync(&dev->work_run); + for (;;) { if (V4L2_TYPE_IS_OUTPUT(q->type)) vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 1441a73ce64c..90aad465f9ed 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -287,6 +287,7 @@ static void v4l_print_format(const void *arg, bool write_only) const struct v4l2_window *win; const struct v4l2_sdr_format *sdr; const struct v4l2_meta_format *meta; + u32 planes; unsigned i; pr_cont("type=%s", prt_names(p->type, v4l2_type_names)); @@ -317,7 +318,8 @@ static void v4l_print_format(const void *arg, bool write_only) prt_names(mp->field, v4l2_field_names), mp->colorspace, mp->num_planes, mp->flags, mp->ycbcr_enc, mp->quantization, mp->xfer_func); - for (i = 0; i < mp->num_planes; i++) + planes = min_t(u32, mp->num_planes, VIDEO_MAX_PLANES); + for (i = 0; i < planes; i++) printk(KERN_DEBUG "plane %u: bytesperline=%u sizeimage=%u\n", i, mp->plane_fmt[i].bytesperline, mp->plane_fmt[i].sizeimage); @@ -1551,8 +1553,11 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, if (unlikely(!ops->vidioc_s_fmt_vid_cap_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); + if (p->fmt.pix_mp.num_planes > VIDEO_MAX_PLANES) + break; for (i = 0; i < p->fmt.pix_mp.num_planes; i++) - CLEAR_AFTER_FIELD(p, fmt.pix_mp.plane_fmt[i].bytesperline); + CLEAR_AFTER_FIELD(&p->fmt.pix_mp.plane_fmt[i], + bytesperline); return ops->vidioc_s_fmt_vid_cap_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OVERLAY: if (unlikely(!ops->vidioc_s_fmt_vid_overlay)) @@ -1581,8 +1586,11 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, if (unlikely(!ops->vidioc_s_fmt_vid_out_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); + if (p->fmt.pix_mp.num_planes > VIDEO_MAX_PLANES) + break; for (i = 0; i < p->fmt.pix_mp.num_planes; i++) - CLEAR_AFTER_FIELD(p, fmt.pix_mp.plane_fmt[i].bytesperline); + CLEAR_AFTER_FIELD(&p->fmt.pix_mp.plane_fmt[i], + bytesperline); return ops->vidioc_s_fmt_vid_out_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: if (unlikely(!ops->vidioc_s_fmt_vid_out_overlay)) @@ -1648,8 +1656,11 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, if (unlikely(!ops->vidioc_try_fmt_vid_cap_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); + if (p->fmt.pix_mp.num_planes > VIDEO_MAX_PLANES) + break; for (i = 0; i < p->fmt.pix_mp.num_planes; i++) - CLEAR_AFTER_FIELD(p, fmt.pix_mp.plane_fmt[i].bytesperline); + CLEAR_AFTER_FIELD(&p->fmt.pix_mp.plane_fmt[i], + bytesperline); return ops->vidioc_try_fmt_vid_cap_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OVERLAY: if (unlikely(!ops->vidioc_try_fmt_vid_overlay)) @@ -1678,8 +1689,11 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, if (unlikely(!ops->vidioc_try_fmt_vid_out_mplane)) break; CLEAR_AFTER_FIELD(p, fmt.pix_mp.xfer_func); + if (p->fmt.pix_mp.num_planes > VIDEO_MAX_PLANES) + break; for (i = 0; i < p->fmt.pix_mp.num_planes; i++) - CLEAR_AFTER_FIELD(p, fmt.pix_mp.plane_fmt[i].bytesperline); + CLEAR_AFTER_FIELD(&p->fmt.pix_mp.plane_fmt[i], + bytesperline); return ops->vidioc_try_fmt_vid_out_mplane(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY: if (unlikely(!ops->vidioc_try_fmt_vid_out_overlay)) diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c index b8aaa684c397..2ed23c99f59f 100644 --- a/drivers/misc/ibmvmc.c +++ b/drivers/misc/ibmvmc.c @@ -820,21 +820,24 @@ static int ibmvmc_send_msg(struct crq_server_adapter *adapter, * * Return: * 0 - Success + * Non-zero - Failure */ static int ibmvmc_open(struct inode *inode, struct file *file) { struct ibmvmc_file_session *session; - int rc = 0; pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__, (unsigned long)inode, (unsigned long)file, ibmvmc.state); session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return -ENOMEM; + session->file = file; file->private_data = session; - return rc; + return 0; } /** diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index 78c26cebf5d4..8f7616557c97 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -1187,9 +1187,15 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) dma_setup_res = (struct hbm_dma_setup_response *)mei_msg; if (dma_setup_res->status) { - dev_info(dev->dev, "hbm: dma setup response: failure = %d %s\n", - dma_setup_res->status, - mei_hbm_status_str(dma_setup_res->status)); + u8 status = dma_setup_res->status; + + if (status == MEI_HBMS_NOT_ALLOWED) { + dev_dbg(dev->dev, "hbm: dma setup not allowed\n"); + } else { + dev_info(dev->dev, "hbm: dma setup response: failure = %d %s\n", + status, + mei_hbm_status_str(status)); + } dev->hbm_f_dr_supported = 0; mei_dmam_ring_free(dev); } diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index e4b10b2d1a08..23739a60517f 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -127,6 +127,8 @@ #define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ #define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ +#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ + #define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ #define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 73ace2d59dea..e89497f858ae 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -88,11 +88,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c index 6b212c8b78e7..2bfa3a903bf9 100644 --- a/drivers/misc/mic/vop/vop_main.c +++ b/drivers/misc/mic/vop/vop_main.c @@ -394,16 +394,21 @@ static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, struct _vop_vdev *vdev = to_vopvdev(dev); struct vop_device *vpdev = vdev->vpdev; struct mic_device_ctrl __iomem *dc = vdev->dc; - int i, err, retry; + int i, err, retry, queue_idx = 0; /* We must have this many virtqueues. */ if (nvqs > ioread8(&vdev->desc->num_vq)) return -ENOENT; for (i = 0; i < nvqs; ++i) { + if (!names[i]) { + vqs[i] = NULL; + continue; + } + dev_dbg(_vop_dev(vdev), "%s: %d: %s\n", __func__, i, names[i]); - vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i], + vqs[i] = vop_find_vq(dev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 595ac065b401..95ff7c5a1dfb 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -70,8 +70,12 @@ pvpanic_walk_resources(struct acpi_resource *res, void *context) struct resource r; if (acpi_dev_resource_io(res, &r)) { +#ifdef CONFIG_HAS_IOPORT_MAP base = ioport_map(r.start, resource_size(&r)); return AE_OK; +#else + return AE_ERROR; +#endif } else if (acpi_dev_resource_memory(res, &r)) { base = ioremap(r.start, resource_size(&r)); return AE_OK; diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index e26b8145efb3..a44ec8bb5418 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -116,7 +116,7 @@ config MMC_RICOH_MMC config MMC_SDHCI_ACPI tristate "SDHCI support for ACPI enumerated SDHCI controllers" - depends on MMC_SDHCI && ACPI + depends on MMC_SDHCI && ACPI && PCI select IOSF_MBI if X86 help This selects support for ACPI enumerated SDHCI controllers, @@ -978,7 +978,7 @@ config MMC_SDHCI_OMAP tristate "TI SDHCI Controller Support" depends on MMC_SDHCI_PLTFM && OF select THERMAL - select TI_SOC_THERMAL + imply TI_SOC_THERMAL help This selects the Secure Digital Host Controller Interface (SDHCI) support present in TI's DRA7 SOCs. The controller supports diff --git a/drivers/mmc/host/dw_mmc-bluefield.c b/drivers/mmc/host/dw_mmc-bluefield.c index ed8f2254b66a..aa38b1a8017e 100644 --- a/drivers/mmc/host/dw_mmc-bluefield.c +++ b/drivers/mmc/host/dw_mmc-bluefield.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 Mellanox Technologies. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index c2690c1a50ff..f19ec60bcbdc 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -179,6 +179,8 @@ struct meson_host { struct sd_emmc_desc *descs; dma_addr_t descs_dma_addr; + int irq; + bool vqmmc_enabled; }; @@ -738,6 +740,11 @@ static int meson_mmc_clk_phase_tuning(struct mmc_host *mmc, u32 opcode, static int meson_mmc_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct meson_host *host = mmc_priv(mmc); + int adj = 0; + + /* enable signal resampling w/o delay */ + adj = ADJUST_ADJ_EN; + writel(adj, host->regs + host->data->adjust); return meson_mmc_clk_phase_tuning(mmc, opcode, host->rx_clk); } @@ -768,6 +775,9 @@ static void meson_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (!IS_ERR(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); + /* disable signal resampling */ + writel(0, host->regs + host->data->adjust); + /* Reset rx phase */ clk_set_phase(host->rx_clk, 0); @@ -1166,7 +1176,7 @@ static int meson_mmc_get_cd(struct mmc_host *mmc) static void meson_mmc_cfg_init(struct meson_host *host) { - u32 cfg = 0, adj = 0; + u32 cfg = 0; cfg |= FIELD_PREP(CFG_RESP_TIMEOUT_MASK, ilog2(SD_EMMC_CFG_RESP_TIMEOUT)); @@ -1177,10 +1187,6 @@ static void meson_mmc_cfg_init(struct meson_host *host) cfg |= CFG_ERR_ABORT; writel(cfg, host->regs + SD_EMMC_CFG); - - /* enable signal resampling w/o delay */ - adj = ADJUST_ADJ_EN; - writel(adj, host->regs + host->data->adjust); } static int meson_mmc_card_busy(struct mmc_host *mmc) @@ -1231,7 +1237,7 @@ static int meson_mmc_probe(struct platform_device *pdev) struct resource *res; struct meson_host *host; struct mmc_host *mmc; - int ret, irq; + int ret; mmc = mmc_alloc_host(sizeof(struct meson_host), &pdev->dev); if (!mmc) @@ -1276,8 +1282,8 @@ static int meson_mmc_probe(struct platform_device *pdev) goto free_host; } - irq = platform_get_irq(pdev, 0); - if (irq <= 0) { + host->irq = platform_get_irq(pdev, 0); + if (host->irq <= 0) { dev_err(&pdev->dev, "failed to get interrupt resource.\n"); ret = -EINVAL; goto free_host; @@ -1331,9 +1337,8 @@ static int meson_mmc_probe(struct platform_device *pdev) writel(IRQ_CRC_ERR | IRQ_TIMEOUTS | IRQ_END_OF_CHAIN, host->regs + SD_EMMC_IRQ_EN); - ret = devm_request_threaded_irq(&pdev->dev, irq, meson_mmc_irq, - meson_mmc_irq_thread, IRQF_SHARED, - NULL, host); + ret = request_threaded_irq(host->irq, meson_mmc_irq, + meson_mmc_irq_thread, IRQF_SHARED, NULL, host); if (ret) goto err_init_clk; @@ -1351,7 +1356,7 @@ static int meson_mmc_probe(struct platform_device *pdev) if (host->bounce_buf == NULL) { dev_err(host->dev, "Unable to map allocate DMA bounce buffer.\n"); ret = -ENOMEM; - goto err_init_clk; + goto err_free_irq; } host->descs = dma_alloc_coherent(host->dev, SD_EMMC_DESC_BUF_LEN, @@ -1370,6 +1375,8 @@ static int meson_mmc_probe(struct platform_device *pdev) err_bounce_buf: dma_free_coherent(host->dev, host->bounce_buf_size, host->bounce_buf, host->bounce_dma_addr); +err_free_irq: + free_irq(host->irq, host); err_init_clk: clk_disable_unprepare(host->mmc_clk); err_core_clk: @@ -1387,6 +1394,7 @@ static int meson_mmc_remove(struct platform_device *pdev) /* disable interrupts */ writel(0, host->regs + SD_EMMC_IRQ_EN); + free_irq(host->irq, host); dma_free_coherent(host->dev, SD_EMMC_DESC_BUF_LEN, host->descs, host->descs_dma_addr); diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 0db99057c44f..9d12c06c7fd6 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -296,7 +296,10 @@ static int sdhci_iproc_probe(struct platform_device *pdev) iproc_host->data = iproc_data; - mmc_of_parse(host->mmc); + ret = mmc_of_parse(host->mmc); + if (ret) + goto err; + sdhci_get_property(pdev); host->mmc->caps |= iproc_host->data->mmc_caps; diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index eebac35304c6..6e8edc9375dd 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1322,7 +1322,7 @@ int denali_init(struct denali_nand_info *denali) } /* clk rate info is needed for setup_data_interface */ - if (denali->clk_rate && denali->clk_x_rate) + if (!denali->clk_rate || !denali->clk_x_rate) chip->options |= NAND_KEEP_TIMINGS; chip->legacy.dummy_controller.ops = &denali_controller_ops; diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 325b4414dccc..c9149a37f8f0 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -593,23 +593,6 @@ static void fsmc_write_buf_dma(struct fsmc_nand_data *host, const u8 *buf, dma_xfer(host, (void *)buf, len, DMA_TO_DEVICE); } -/* fsmc_select_chip - assert or deassert nCE */ -static void fsmc_ce_ctrl(struct fsmc_nand_data *host, bool assert) -{ - u32 pc = readl(host->regs_va + FSMC_PC); - - if (!assert) - writel_relaxed(pc & ~FSMC_ENABLE, host->regs_va + FSMC_PC); - else - writel_relaxed(pc | FSMC_ENABLE, host->regs_va + FSMC_PC); - - /* - * nCE line changes must be applied before returning from this - * function. - */ - mb(); -} - /* * fsmc_exec_op - hook called by the core to execute NAND operations * @@ -627,8 +610,6 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, pr_debug("Executing operation [%d instructions]:\n", op->ninstrs); - fsmc_ce_ctrl(host, true); - for (op_id = 0; op_id < op->ninstrs; op_id++) { instr = &op->instrs[op_id]; @@ -686,8 +667,6 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, } } - fsmc_ce_ctrl(host, false); - return ret; } diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c index f92ae5aa2a54..9526d5b23c80 100644 --- a/drivers/mtd/nand/raw/jz4740_nand.c +++ b/drivers/mtd/nand/raw/jz4740_nand.c @@ -260,7 +260,7 @@ static int jz_nand_correct_ecc_rs(struct nand_chip *chip, uint8_t *dat, } static int jz_nand_ioremap_resource(struct platform_device *pdev, - const char *name, struct resource **res, void *__iomem *base) + const char *name, struct resource **res, void __iomem **base) { int ret; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 6371958dd170..edb1c023a753 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -519,7 +519,7 @@ config NET_FAILOVER and destroy a failover master netdev and manages a primary and standby slave netdevs that get registered via the generic failover infrastructure. This can be used by paravirtual drivers to enable - an alternate low latency datapath. It alsoenables live migration of + an alternate low latency datapath. It also enables live migration of a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 7c46d9f4fefd..9274dcc6e9b0 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -31,6 +31,7 @@ #include #include #include +#include /* General definitions */ #define AD_SHORT_TIMEOUT 1 @@ -851,6 +852,9 @@ static int ad_lacpdu_send(struct port *port) if (!skb) return -ENOMEM; + atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx); + atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx); + skb->dev = slave->dev; skb_reset_mac_header(skb); skb->network_header = skb->mac_header + ETH_HLEN; @@ -892,6 +896,17 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker) if (!skb) return -ENOMEM; + switch (marker->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx); + atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx); + break; + case AD_MARKER_RESPONSE_SUBTYPE: + atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx); + atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx); + break; + } + skb_reserve(skb, 16); skb->dev = slave->dev; @@ -1086,6 +1101,10 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port) */ last_state = port->sm_rx_state; + if (lacpdu) { + atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx); + atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx); + } /* check if state machine should change state */ /* first, check if port was reinitialized */ @@ -1922,6 +1941,9 @@ static void ad_marker_info_received(struct bond_marker *marker_info, { struct bond_marker marker; + atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx); + atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx); + /* copy the received marker data to the response marker */ memcpy(&marker, marker_info, sizeof(struct bond_marker)); /* change the marker subtype to marker response */ @@ -1946,6 +1968,9 @@ static void ad_marker_info_received(struct bond_marker *marker_info, static void ad_marker_response_received(struct bond_marker *marker, struct port *port) { + atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx); + atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx); + /* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */ } @@ -2348,66 +2373,68 @@ re_arm: * bond_3ad_rx_indication - handle a received frame * @lacpdu: received lacpdu * @slave: slave struct to work on - * @length: length of the data received * * It is assumed that frames that were sent on this NIC don't returned as new * received frames (loopback). Since only the payload is given to this * function, it check for loopback. */ -static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, - u16 length) +static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave) { - struct port *port; + struct bonding *bond = slave->bond; int ret = RX_HANDLER_ANOTHER; + struct bond_marker *marker; + struct port *port; + atomic64_t *stat; - if (length >= sizeof(struct lacpdu)) { - - port = &(SLAVE_AD_INFO(slave)->port); - - if (!port->slave) { - net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", - slave->dev->name, slave->bond->dev->name); - return ret; - } - - switch (lacpdu->subtype) { - case AD_TYPE_LACPDU: - ret = RX_HANDLER_CONSUMED; - netdev_dbg(slave->bond->dev, - "Received LACPDU on port %d slave %s\n", - port->actor_port_number, - slave->dev->name); - /* Protect against concurrent state machines */ - spin_lock(&slave->bond->mode_lock); - ad_rx_machine(lacpdu, port); - spin_unlock(&slave->bond->mode_lock); - break; - - case AD_TYPE_MARKER: - ret = RX_HANDLER_CONSUMED; - /* No need to convert fields to Little Endian since we - * don't use the marker's fields. - */ - - switch (((struct bond_marker *)lacpdu)->tlv_type) { - case AD_MARKER_INFORMATION_SUBTYPE: - netdev_dbg(slave->bond->dev, "Received Marker Information on port %d\n", - port->actor_port_number); - ad_marker_info_received((struct bond_marker *)lacpdu, port); - break; - - case AD_MARKER_RESPONSE_SUBTYPE: - netdev_dbg(slave->bond->dev, "Received Marker Response on port %d\n", - port->actor_port_number); - ad_marker_response_received((struct bond_marker *)lacpdu, port); - break; - - default: - netdev_dbg(slave->bond->dev, "Received an unknown Marker subtype on slot %d\n", - port->actor_port_number); - } - } + port = &(SLAVE_AD_INFO(slave)->port); + if (!port->slave) { + net_warn_ratelimited("%s: Warning: port of slave %s is uninitialized\n", + slave->dev->name, slave->bond->dev->name); + return ret; } + + switch (lacpdu->subtype) { + case AD_TYPE_LACPDU: + ret = RX_HANDLER_CONSUMED; + netdev_dbg(slave->bond->dev, + "Received LACPDU on port %d slave %s\n", + port->actor_port_number, slave->dev->name); + /* Protect against concurrent state machines */ + spin_lock(&slave->bond->mode_lock); + ad_rx_machine(lacpdu, port); + spin_unlock(&slave->bond->mode_lock); + break; + case AD_TYPE_MARKER: + ret = RX_HANDLER_CONSUMED; + /* No need to convert fields to Little Endian since we + * don't use the marker's fields. + */ + marker = (struct bond_marker *)lacpdu; + switch (marker->tlv_type) { + case AD_MARKER_INFORMATION_SUBTYPE: + netdev_dbg(slave->bond->dev, "Received Marker Information on port %d\n", + port->actor_port_number); + ad_marker_info_received(marker, port); + break; + case AD_MARKER_RESPONSE_SUBTYPE: + netdev_dbg(slave->bond->dev, "Received Marker Response on port %d\n", + port->actor_port_number); + ad_marker_response_received(marker, port); + break; + default: + netdev_dbg(slave->bond->dev, "Received an unknown Marker subtype on slot %d\n", + port->actor_port_number); + stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx; + atomic64_inc(stat); + stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx; + atomic64_inc(stat); + } + break; + default: + atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx); + atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx); + } + return ret; } @@ -2643,10 +2670,13 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, return RX_HANDLER_ANOTHER; lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu); - if (!lacpdu) + if (!lacpdu) { + atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx); + atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx); return RX_HANDLER_ANOTHER; + } - return bond_3ad_rx_indication(lacpdu, slave, skb->len); + return bond_3ad_rx_indication(lacpdu, slave); } /** @@ -2678,3 +2708,61 @@ void bond_3ad_update_lacp_rate(struct bonding *bond) } spin_unlock_bh(&bond->mode_lock); } + +size_t bond_3ad_stats_size(void) +{ + return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */ + nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */ + nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */ +} + +int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats) +{ + u64 val; + + val = atomic64_read(&stats->lacpdu_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->lacpdu_tx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->lacpdu_unknown_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->lacpdu_illegal_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + + val = atomic64_read(&stats->marker_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->marker_tx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->marker_resp_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->marker_resp_tx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + val = atomic64_read(&stats->marker_unknown_rx); + if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val, + BOND_3AD_STAT_PAD)) + return -EMSGSIZE; + + return 0; +} diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 6b9ad8673218..b286f591242e 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -675,6 +675,71 @@ nla_put_failure: return -EMSGSIZE; } +static size_t bond_get_linkxstats_size(const struct net_device *dev, int attr) +{ + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + case IFLA_STATS_LINK_XSTATS_SLAVE: + break; + default: + return 0; + } + + return bond_3ad_stats_size() + nla_total_size(0); +} + +static int bond_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx, int attr) +{ + struct nlattr *nla __maybe_unused; + struct slave *slave = NULL; + struct nlattr *nest, *nest2; + struct bonding *bond; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + bond = netdev_priv(dev); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + slave = bond_slave_get_rtnl(dev); + if (!slave) + return 0; + bond = slave->bond; + break; + default: + return -EINVAL; + } + + nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BOND); + if (!nest) + return -EMSGSIZE; + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + struct bond_3ad_stats *stats; + + if (slave) + stats = &SLAVE_AD_INFO(slave)->stats; + else + stats = &BOND_AD_INFO(bond).stats; + + nest2 = nla_nest_start(skb, BOND_XSTATS_3AD); + if (!nest2) { + nla_nest_end(skb, nest); + return -EMSGSIZE; + } + + if (bond_3ad_stats_fill(skb, stats)) { + nla_nest_cancel(skb, nest2); + nla_nest_end(skb, nest); + return -EMSGSIZE; + } + nla_nest_end(skb, nest2); + } + nla_nest_end(skb, nest); + + return 0; +} + struct rtnl_link_ops bond_link_ops __read_mostly = { .kind = "bond", .priv_size = sizeof(struct bonding), @@ -689,6 +754,8 @@ struct rtnl_link_ops bond_link_ops __read_mostly = { .get_num_tx_queues = bond_get_num_tx_queues, .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number as for TX queues */ + .fill_linkxstats = bond_fill_linkxstats, + .get_linkxstats_size = bond_get_linkxstats_size, .slave_maxtype = IFLA_BOND_SLAVE_MAX, .slave_policy = bond_slave_policy, .slave_changelink = bond_slave_changelink, diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index a0f954f36c09..44e6c7b1b222 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -257,10 +257,7 @@ static int handle_tx(struct ser_device *ser) if (skb->len == 0) { struct sk_buff *tmp = skb_dequeue(&ser->head); WARN_ON(tmp != skb); - if (in_interrupt()) - dev_kfree_skb_irq(skb); - else - kfree_skb(skb); + dev_consume_skb_any(skb); } } /* Send flow off if queue is empty */ diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 3b3f88ffab53..c05e4d50d43d 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -480,8 +480,6 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -489,20 +487,21 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (!skb) { - netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", - __func__, idx); - return NULL; + if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; + + *len_ptr = len; + priv->echo_skb[idx] = NULL; + + return skb; } - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - cf = (struct canfd_frame *)skb->data; - *len_ptr = cf->len; - priv->echo_skb[idx] = NULL; - - return skb; + return NULL; } /* diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0f36eafe3ac1..1c66fb2ad76b 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1106,7 +1106,7 @@ static int flexcan_chip_start(struct net_device *dev) } } else { /* clear and invalidate unused mailboxes first */ - for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i <= priv->mb_count; i++) { + for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i < priv->mb_count; i++) { mb = flexcan_get_mb(priv, i); priv->write(FLEXCAN_MB_CODE_RX_INACTIVE, &mb->can_ctrl); @@ -1432,7 +1432,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) gpr_np = of_find_node_by_phandle(phandle); if (!gpr_np) { dev_dbg(&pdev->dev, "could not find gpr node by phandle\n"); - return PTR_ERR(gpr_np); + return -ENODEV; } priv = netdev_priv(dev); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index a8a2c728afba..c2b61500f958 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -621,17 +621,19 @@ static void mt7530_adjust_link(struct dsa_switch *ds, int port, struct mt7530_priv *priv = ds->priv; if (phy_is_pseudo_fixed_link(phydev)) { - dev_dbg(priv->dev, "phy-mode for master device = %x\n", - phydev->interface); + if (priv->id == ID_MT7530) { + dev_dbg(priv->dev, "phy-mode for master device = %x\n", + phydev->interface); - /* Setup TX circuit incluing relevant PAD and driving */ - mt7530_pad_clk_setup(ds, phydev->interface); + /* Setup TX circuit incluing relevant PAD and driving */ + mt7530_pad_clk_setup(ds, phydev->interface); - /* Setup RX circuit, relevant PAD and driving on the host - * which must be placed after the setup on the device side is - * all finished. - */ - mt7623_pad_clk_setup(ds); + /* Setup RX circuit, relevant PAD and driving on the + * host which must be placed after the setup on the + * device side is all finished. + */ + mt7623_pad_clk_setup(ds); + } } else { u16 lcl_adv = 0, rmt_adv = 0; u8 flowctrl; @@ -687,6 +689,10 @@ mt7530_cpu_port_enable(struct mt7530_priv *priv, /* Unknown unicast frame fordwarding to the cpu port */ mt7530_set(priv, MT7530_MFC, UNU_FFP(BIT(port))); + /* Set CPU port number */ + if (priv->id == ID_MT7621) + mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); + /* CPU port gets connected to all user ports of * the switch */ @@ -1219,24 +1225,27 @@ mt7530_setup(struct dsa_switch *ds) * as two netdev instances. */ dn = ds->ports[MT7530_CPU_PORT].master->dev.of_node->parent; - priv->ethernet = syscon_node_to_regmap(dn); - if (IS_ERR(priv->ethernet)) - return PTR_ERR(priv->ethernet); - regulator_set_voltage(priv->core_pwr, 1000000, 1000000); - ret = regulator_enable(priv->core_pwr); - if (ret < 0) { - dev_err(priv->dev, - "Failed to enable core power: %d\n", ret); - return ret; - } + if (priv->id == ID_MT7530) { + priv->ethernet = syscon_node_to_regmap(dn); + if (IS_ERR(priv->ethernet)) + return PTR_ERR(priv->ethernet); - regulator_set_voltage(priv->io_pwr, 3300000, 3300000); - ret = regulator_enable(priv->io_pwr); - if (ret < 0) { - dev_err(priv->dev, "Failed to enable io pwr: %d\n", - ret); - return ret; + regulator_set_voltage(priv->core_pwr, 1000000, 1000000); + ret = regulator_enable(priv->core_pwr); + if (ret < 0) { + dev_err(priv->dev, + "Failed to enable core power: %d\n", ret); + return ret; + } + + regulator_set_voltage(priv->io_pwr, 3300000, 3300000); + ret = regulator_enable(priv->io_pwr); + if (ret < 0) { + dev_err(priv->dev, "Failed to enable io pwr: %d\n", + ret); + return ret; + } } /* Reset whole chip through gpio pin or memory-mapped registers for @@ -1326,6 +1335,13 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_vlan_del = mt7530_port_vlan_del, }; +static const struct of_device_id mt7530_of_match[] = { + { .compatible = "mediatek,mt7621", .data = (void *)ID_MT7621, }, + { .compatible = "mediatek,mt7530", .data = (void *)ID_MT7530, }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, mt7530_of_match); + static int mt7530_probe(struct mdio_device *mdiodev) { @@ -1356,13 +1372,21 @@ mt7530_probe(struct mdio_device *mdiodev) } } - priv->core_pwr = devm_regulator_get(&mdiodev->dev, "core"); - if (IS_ERR(priv->core_pwr)) - return PTR_ERR(priv->core_pwr); + /* Get the hardware identifier from the devicetree node. + * We will need it for some of the clock and regulator setup. + */ + priv->id = (unsigned int)(unsigned long) + of_device_get_match_data(&mdiodev->dev); - priv->io_pwr = devm_regulator_get(&mdiodev->dev, "io"); - if (IS_ERR(priv->io_pwr)) - return PTR_ERR(priv->io_pwr); + if (priv->id == ID_MT7530) { + priv->core_pwr = devm_regulator_get(&mdiodev->dev, "core"); + if (IS_ERR(priv->core_pwr)) + return PTR_ERR(priv->core_pwr); + + priv->io_pwr = devm_regulator_get(&mdiodev->dev, "io"); + if (IS_ERR(priv->io_pwr)) + return PTR_ERR(priv->io_pwr); + } /* Not MCM that indicates switch works as the remote standalone * integrated circuit so the GPIO pin would be used to complete @@ -1408,12 +1432,6 @@ mt7530_remove(struct mdio_device *mdiodev) mutex_destroy(&priv->reg_mutex); } -static const struct of_device_id mt7530_of_match[] = { - { .compatible = "mediatek,mt7530" }, - { /* sentinel */ }, -}; -MODULE_DEVICE_TABLE(of, mt7530_of_match); - static struct mdio_driver mt7530_mdio_driver = { .probe = mt7530_probe, .remove = mt7530_remove, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index d9b407a22a58..a95ed958df5b 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -19,6 +19,11 @@ #define MT7530_NUM_FDB_RECORDS 2048 #define MT7530_ALL_MEMBERS 0xff +enum { + ID_MT7530 = 0, + ID_MT7621 = 1, +}; + #define NUM_TRGMII_CTRL 5 #define TRGMII_BASE(x) (0x10000 + (x)) @@ -36,6 +41,9 @@ #define UNM_FFP(x) (((x) & 0xff) << 16) #define UNU_FFP(x) (((x) & 0xff) << 8) #define UNU_FFP_MASK UNU_FFP(~0) +#define CPU_EN BIT(7) +#define CPU_PORT(x) ((x) << 4) +#define CPU_MASK (0xf << 4) /* Registers for address table access */ #define MT7530_ATA1 0x74 @@ -430,6 +438,7 @@ struct mt7530_priv { struct regulator *core_pwr; struct regulator *io_pwr; struct gpio_desc *reset; + unsigned int id; bool mcm; struct mt7530_port ports[MT7530_NUM_PORTS]; diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 2caa8c8b4b55..1bfc5ff8d81d 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -664,7 +664,7 @@ int mv88e6390_serdes_irq_setup(struct mv88e6xxx_chip *chip, int port) if (port < 9) return 0; - return mv88e6390_serdes_irq_setup(chip, port); + return mv88e6390x_serdes_irq_setup(chip, port); } void mv88e6390x_serdes_irq_free(struct mv88e6xxx_chip *chip, int port) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 7e97e620bd44..a4b6cda38016 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -420,7 +420,7 @@ qca8k_mib_init(struct qca8k_priv *priv) static int qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) { - u32 reg; + u32 reg, val; switch (port) { case 0: @@ -439,17 +439,9 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) */ switch (mode) { case PHY_INTERFACE_MODE_RGMII: - qca8k_write(priv, reg, - QCA8K_PORT_PAD_RGMII_EN | - QCA8K_PORT_PAD_RGMII_TX_DELAY(3) | - QCA8K_PORT_PAD_RGMII_RX_DELAY(3)); - - /* According to the datasheet, RGMII delay is enabled through - * PORT5_PAD_CTRL for all ports, rather than individual port - * registers - */ - qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, - QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + /* RGMII mode means no delay so don't enable the delay */ + val = QCA8K_PORT_PAD_RGMII_EN; + qca8k_write(priv, reg, val); break; case PHY_INTERFACE_MODE_SGMII: qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); diff --git a/drivers/net/dsa/realtek-smi.c b/drivers/net/dsa/realtek-smi.c index b4b839a1d095..ad41ec63cc9f 100644 --- a/drivers/net/dsa/realtek-smi.c +++ b/drivers/net/dsa/realtek-smi.c @@ -347,16 +347,17 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi) struct device_node *mdio_np; int ret; - mdio_np = of_find_compatible_node(smi->dev->of_node, NULL, - "realtek,smi-mdio"); + mdio_np = of_get_compatible_child(smi->dev->of_node, "realtek,smi-mdio"); if (!mdio_np) { dev_err(smi->dev, "no MDIO bus node\n"); return -ENODEV; } smi->slave_mii_bus = devm_mdiobus_alloc(smi->dev); - if (!smi->slave_mii_bus) - return -ENOMEM; + if (!smi->slave_mii_bus) { + ret = -ENOMEM; + goto err_put_node; + } smi->slave_mii_bus->priv = smi; smi->slave_mii_bus->name = "SMI slave MII"; smi->slave_mii_bus->read = realtek_smi_mdio_read; @@ -371,10 +372,15 @@ int realtek_smi_setup_mdio(struct realtek_smi *smi) if (ret) { dev_err(smi->dev, "unable to register MDIO bus %s\n", smi->slave_mii_bus->id); - of_node_put(mdio_np); + goto err_put_node; } return 0; + +err_put_node: + of_node_put(mdio_np); + + return ret; } static int realtek_smi_probe(struct platform_device *pdev) @@ -457,6 +463,8 @@ static int realtek_smi_remove(struct platform_device *pdev) struct realtek_smi *smi = dev_get_drvdata(&pdev->dev); dsa_unregister_switch(smi->ds); + if (smi->slave_mii_bus) + of_node_put(smi->slave_mii_bus->dev.of_node); gpiod_set_value(smi->reset, 1); return 0; diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index b223769d6a5e..3da97996bdf3 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -1266,12 +1266,14 @@ el3_up(struct net_device *dev) pr_cont("Forcing 3c5x9b full-duplex mode"); break; } + /* fall through */ case 8: /* set full-duplex mode based on eeprom config setting */ if ((sw_info & 0x000f) && (sw_info & 0x8000)) { pr_cont("Setting 3c5x9b full-duplex mode (from EEPROM configuration bit)"); break; } + /* fall through */ default: /* xcvr=(0 || 4) OR user has an old 3c5x9 non "B" model */ pr_cont("Setting 3c5x9/3c5x9B half-duplex mode"); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index 4f11f98347ed..1827ef1f6d55 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -2059,7 +2059,7 @@ static inline void ace_tx_int(struct net_device *dev, if (skb) { dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); info->skb = NULL; } diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 0fb986ba3290..0ae723f75341 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -145,7 +145,8 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) & 0xffff; if (inuse) { /* Tx FIFO is not empty */ - ready = priv->tx_prod - priv->tx_cons - inuse - 1; + ready = max_t(int, + priv->tx_prod - priv->tx_cons - inuse - 1, 0); } else { /* Check for buffered last packet */ status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 02921d877c08..aa1d1f5339d2 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -714,8 +714,10 @@ static struct phy_device *connect_local_phy(struct net_device *dev) phydev = phy_connect(dev, phy_id_fmt, &altera_tse_adjust_link, priv->phy_iface); - if (IS_ERR(phydev)) + if (IS_ERR(phydev)) { netdev_err(dev, "Could not attach to PHY\n"); + phydev = NULL; + } } else { int ret; diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index a90080f12e67..145fe71fd155 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -435,7 +435,7 @@ static int amd8111e_restart(struct net_device *dev) int i,reg_val; /* stop the chip */ - writel(RUN, mmio + CMD0); + writel(RUN, mmio + CMD0); if(amd8111e_init_ring(dev)) return -ENOMEM; @@ -666,7 +666,7 @@ static int amd8111e_tx(struct net_device *dev) pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[tx_index], lp->tx_skbuff[tx_index]->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq (lp->tx_skbuff[tx_index]); + dev_consume_skb_irq(lp->tx_skbuff[tx_index]); lp->tx_skbuff[tx_index] = NULL; lp->tx_dma_addr[tx_index] = 0; } @@ -1720,7 +1720,7 @@ static void amd8111e_config_ipg(struct timer_list *t) writew((u32)tmp_ipg, mmio + IPG); writew((u32)(tmp_ipg - IFS1_DELTA), mmio + IFS1); } - mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES); + mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES); return; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index d272dc6984ac..b40d4377cc71 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -431,8 +431,6 @@ #define MAC_MDIOSCAR_PA_WIDTH 5 #define MAC_MDIOSCAR_RA_INDEX 0 #define MAC_MDIOSCAR_RA_WIDTH 16 -#define MAC_MDIOSCAR_REG_INDEX 0 -#define MAC_MDIOSCAR_REG_WIDTH 21 #define MAC_MDIOSCCDR_BUSY_INDEX 22 #define MAC_MDIOSCCDR_BUSY_WIDTH 1 #define MAC_MDIOSCCDR_CMD_INDEX 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 1e929a1e4ca7..4666084eda16 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad, } } +static unsigned int xgbe_create_mdio_sca(int port, int reg) +{ + unsigned int mdio_sca, da; + + da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; + + mdio_sca = 0; + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); + XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); + + return mdio_sca; +} + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, int reg, u16 val) { @@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; @@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, reinit_completion(&pdata->mdio_complete); - mdio_sca = 0; - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); - XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); + mdio_sca = xgbe_create_mdio_sca(addr, reg); XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); mdio_sccd = 0; diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index 6a8e2567f2bd..4d3855ceb500 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -777,7 +777,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id) if (bp->tx_bufs[bp->tx_empty]) { ++dev->stats.tx_packets; - dev_kfree_skb_irq(bp->tx_bufs[bp->tx_empty]); + dev_consume_skb_irq(bp->tx_bufs[bp->tx_empty]); } bp->tx_bufs[bp->tx_empty] = NULL; bp->tx_fullup = 0; diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index bb41becb6609..c44e95255aa1 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -909,7 +909,7 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb, (adapter->txd_write_ptr >> 2)); mmiowb(); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index f44808959ff3..97ab0dd25552 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -638,7 +638,7 @@ static void b44_tx(struct b44 *bp) bytes_compl += skb->len; pkts_compl++; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } netdev_completed_queue(bp->dev, pkts_compl, bytes_compl); @@ -1012,7 +1012,7 @@ static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev) } skb_copy_from_linear_data(skb, skb_put(bounce_skb, len), len); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); skb = bounce_skb; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 03d131f777bc..6026b53137aa 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -32,7 +32,7 @@ * (you will need to reboot afterwards) */ /* #define BNX2X_STOP_ON_ERROR */ -#define DRV_MODULE_VERSION "1.712.30-0" +#define DRV_MODULE_VERSION "1.713.36-0" #define DRV_MODULE_RELDATE "2014/02/10" #define BNX2X_BC_VER 0x040200 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index 29738dfa878c..d581d0ae6584 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -6335,7 +6335,7 @@ int bnx2x_set_led(struct link_params *params, */ if (!vars->link_up) break; - /* else: fall through */ + /* fall through */ case LED_MODE_ON: if (((params->phy[EXT_PHY1].type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727) || diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index c835f6c7ecd0..c97b642e6537 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2230,7 +2230,7 @@ int bnx2x_vf_free(struct bnx2x *bp, struct bnx2x_virtf *vf) rc = bnx2x_vf_close(bp, vf); if (rc) goto op_err; - /* Fallthrough to release resources */ + /* Fall through - to release resources */ case VF_ACQUIRED: DP(BNX2X_MSG_IOV, "about to free resources\n"); bnx2x_vf_free_resc(bp, vf); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a451796deefe..5c886a700bcc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1609,6 +1609,7 @@ struct bnxt { /* devlink interface and vf-rep structs */ struct devlink *dl; + struct devlink_port dl_port; enum devlink_eswitch_mode eswitch_mode; struct bnxt_vf_rep **vf_reps; /* array of vf-rep ptrs */ u16 *cfa_code_map; /* cfa_code -> vf_idx map */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 7f56032e44ac..a6abfa4fb168 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -37,6 +37,8 @@ static const struct bnxt_dl_nvm_param nvm_params[] = { NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7}, {BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK, BNXT_NVM_SHARED_CFG, 1}, + + {DEVLINK_PARAM_GENERIC_ID_WOL, NVM_OFF_WOL, BNXT_NVM_PORT_CFG, 1}, }; static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, @@ -70,7 +72,8 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE; switch (bytesize) { case 1: - if (nvm_param.num_bits == 1) + if (nvm_param.num_bits == 1 && + nvm_param.id != DEVLINK_PARAM_GENERIC_ID_WOL) buf = &val->vbool; else buf = &val->vu8; @@ -164,6 +167,17 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id, return 0; } +static int bnxt_dl_wol_validate(struct devlink *dl, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu8 && val.vu8 != DEVLINK_PARAM_WAKE_MAGIC) { + NL_SET_ERR_MSG_MOD(extack, "WOL type is not supported"); + return -EINVAL; + } + return 0; +} + static const struct devlink_param bnxt_dl_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), @@ -188,6 +202,12 @@ static const struct devlink_param bnxt_dl_params[] = { NULL), }; +static const struct devlink_param bnxt_dl_port_params[] = { + DEVLINK_PARAM_GENERIC(WOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set, + bnxt_dl_wol_validate), +}; + int bnxt_dl_register(struct bnxt *bp) { struct devlink *dl; @@ -225,8 +245,26 @@ int bnxt_dl_register(struct bnxt *bp) goto err_dl_unreg; } + rc = devlink_port_register(dl, &bp->dl_port, bp->pf.port_id); + if (rc) { + netdev_err(bp->dev, "devlink_port_register failed"); + goto err_dl_param_unreg; + } + devlink_port_type_eth_set(&bp->dl_port, bp->dev); + + rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, + ARRAY_SIZE(bnxt_dl_port_params)); + if (rc) { + netdev_err(bp->dev, "devlink_port_params_register failed"); + goto err_dl_port_unreg; + } return 0; +err_dl_port_unreg: + devlink_port_unregister(&bp->dl_port); +err_dl_param_unreg: + devlink_params_unregister(dl, bnxt_dl_params, + ARRAY_SIZE(bnxt_dl_params)); err_dl_unreg: devlink_unregister(dl); err_dl_free: @@ -242,6 +280,9 @@ void bnxt_dl_unregister(struct bnxt *bp) if (!dl) return; + devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params, + ARRAY_SIZE(bnxt_dl_port_params)); + devlink_port_unregister(&bp->dl_port); devlink_params_unregister(dl, bnxt_dl_params, ARRAY_SIZE(bnxt_dl_params)); devlink_unregister(dl); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h index 5b6b2c7d97cf..da065ca84cc7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -35,6 +35,7 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) #define NVM_OFF_MSIX_VEC_PER_PF_MAX 108 #define NVM_OFF_MSIX_VEC_PER_PF_MIN 114 +#define NVM_OFF_WOL 152 #define NVM_OFF_IGNORE_ARI 164 #define NVM_OFF_DIS_GRE_VER_CHECK 171 #define NVM_OFF_ENABLE_SRIOV 401 diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index b1627dd5f2fd..328373e0578f 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -721,7 +721,7 @@ static int tg3_ape_lock(struct tg3 *tp, int locknum) case TG3_APE_LOCK_GPIO: if (tg3_asic_rev(tp) == ASIC_REV_5761) return 0; - /* else: fall through */ + /* fall through */ case TG3_APE_LOCK_GRC: case TG3_APE_LOCK_MEM: if (!tp->pci_fn) @@ -782,7 +782,7 @@ static void tg3_ape_unlock(struct tg3 *tp, int locknum) case TG3_APE_LOCK_GPIO: if (tg3_asic_rev(tp) == ASIC_REV_5761) return; - /* else: fall through */ + /* fall through */ case TG3_APE_LOCK_GRC: case TG3_APE_LOCK_MEM: if (!tp->pci_fn) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index a36e38676640..cbc9175d74c5 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -788,9 +788,8 @@ bfa_iocpf_sm_enabling(struct bfa_iocpf *iocpf, enum iocpf_event event) case IOCPF_E_INITFAIL: del_timer(&ioc->iocpf_timer); - /* - * !!! fall through !!! - */ + /* fall through */ + case IOCPF_E_TIMEOUT: bfa_nw_ioc_hw_sem_release(ioc); if (event == IOCPF_E_TIMEOUT) @@ -858,9 +857,7 @@ bfa_iocpf_sm_disabling(struct bfa_iocpf *iocpf, enum iocpf_event event) case IOCPF_E_FAIL: del_timer(&ioc->iocpf_timer); - /* - * !!! fall through !!! - */ + /* fall through*/ case IOCPF_E_TIMEOUT: bfa_ioc_set_cur_ioc_fwstate(ioc, BFI_IOC_FAIL); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 3d45f4c92cf6..9bbaad9f3d63 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -643,6 +643,7 @@ #define MACB_CAPS_JUMBO 0x00000020 #define MACB_CAPS_GEM_HAS_PTP 0x00000040 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 +#define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 @@ -1214,6 +1215,8 @@ struct macb { int rx_bd_rd_prefetch; int tx_bd_rd_prefetch; + + u32 rx_intr_mask; }; #ifdef CONFIG_MACB_USE_HWSTAMP diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 66cc7927061a..2b2882615e8b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -56,8 +56,7 @@ /* level of occupied TX descriptors under which we wake up TX process */ #define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) -#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ - | MACB_BIT(ISR_ROVR)) +#define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR)) #define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ | MACB_BIT(ISR_RLE) \ | MACB_BIT(TXERR)) @@ -1270,7 +1269,7 @@ static int macb_poll(struct napi_struct *napi, int budget) queue_writel(queue, ISR, MACB_BIT(RCOMP)); napi_reschedule(napi); } else { - queue_writel(queue, IER, MACB_RX_INT_FLAGS); + queue_writel(queue, IER, bp->rx_intr_mask); } } @@ -1288,7 +1287,7 @@ static void macb_hresp_error_task(unsigned long data) u32 ctrl; for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, IDR, MACB_RX_INT_FLAGS | + queue_writel(queue, IDR, bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -1318,7 +1317,7 @@ static void macb_hresp_error_task(unsigned long data) /* Enable interrupts */ queue_writel(queue, IER, - MACB_RX_INT_FLAGS | + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -1372,14 +1371,14 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) (unsigned int)(queue - bp->queues), (unsigned long)status); - if (status & MACB_RX_INT_FLAGS) { + if (status & bp->rx_intr_mask) { /* There's no point taking any more interrupts * until we have processed the buffers. The * scheduling call may fail if the poll routine * is already scheduled, so disable interrupts * now. */ - queue_writel(queue, IDR, MACB_RX_INT_FLAGS); + queue_writel(queue, IDR, bp->rx_intr_mask); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(RCOMP)); @@ -1412,8 +1411,9 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) /* There is a hardware issue under heavy load where DMA can * stop, this causes endless "used buffer descriptor read" * interrupts but it can be cleared by re-enabling RX. See - * the at91 manual, section 41.3.1 or the Zynq manual - * section 16.7.4 for details. + * the at91rm9200 manual, section 41.3.1 or the Zynq manual + * section 16.7.4 for details. RXUBR is only enabled for + * these two versions. */ if (status & MACB_BIT(RXUBR)) { ctrl = macb_readl(bp, NCR); @@ -2259,7 +2259,7 @@ static void macb_init_hw(struct macb *bp) /* Enable interrupts */ queue_writel(queue, IER, - MACB_RX_INT_FLAGS | + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); } @@ -3907,6 +3907,7 @@ static const struct macb_config sama5d4_config = { }; static const struct macb_config emac_config = { + .caps = MACB_CAPS_NEEDS_RSTONUBR, .clk_init = at91ether_clk_init, .init = at91ether_init, }; @@ -3928,7 +3929,8 @@ static const struct macb_config zynqmp_config = { }; static const struct macb_config zynq_config = { - .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | + MACB_CAPS_NEEDS_RSTONUBR, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, @@ -4083,6 +4085,10 @@ static int macb_probe(struct platform_device *pdev) macb_dma_desc_get_size(bp); } + bp->rx_intr_mask = MACB_RX_INT_FLAGS; + if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) + bp->rx_intr_mask |= MACB_BIT(RXUBR); + mac = of_get_mac_address(np); if (mac) { ether_addr_copy(bp->dev->dev_addr, mac); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 825a28e5b544..e21bf3724611 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -661,7 +661,8 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), (((rh->r_dh.encap_on) && (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) || (!(rh->r_dh.encap_on) && - (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED)))) + ((rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED) == + CNNIC_CSUM_VERIFIED)))) /* checksum has already been verified */ skb->ip_summed = CHECKSUM_UNNECESSARY; else diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 5701272aa7f7..ce28820c57c9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -289,8 +289,7 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start, if (clipt_size < CLIPT_MIN_HASH_BUCKETS) return NULL; - ctbl = kvzalloc(sizeof(*ctbl) + - clipt_size*sizeof(struct list_head), GFP_KERNEL); + ctbl = kvzalloc(struct_size(ctbl, hash_list, clipt_size), GFP_KERNEL); if (!ctbl) return NULL; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index d07230c892a5..796043544fc3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -446,8 +446,10 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, unsigned long *link_mode_mask) { #define SET_LMM(__lmm_name) \ - __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \ - link_mode_mask) + do { \ + __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \ + link_mode_mask); \ + } while (0) #define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \ do { \ @@ -541,7 +543,7 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, case FW_PORT_TYPE_CR4_QSFP: SET_LMM(FIBRE); FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full); - FW_CAPS_TO_LMM(SPEED_10G, 10000baseSR_Full); + FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full); @@ -552,6 +554,13 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, break; } + if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) { + FW_CAPS_TO_LMM(FEC_RS, FEC_RS); + FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER); + } else { + SET_LMM(FEC_NONE); + } + FW_CAPS_TO_LMM(ANEG, Autoneg); FW_CAPS_TO_LMM(802_3_PAUSE, Pause); FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause); @@ -679,18 +688,15 @@ static int set_link_ksettings(struct net_device *dev, base->autoneg == AUTONEG_DISABLE) { fw_caps = speed_to_fw_caps(base->speed); - /* Must only specify a single speed which must be supported - * as part of the Physical Port Capabilities. - */ - if ((fw_caps & (fw_caps - 1)) != 0 || - !(lc->pcaps & fw_caps)) + /* Speed must be supported by Physical Port Capabilities. */ + if (!(lc->pcaps & fw_caps)) return -EINVAL; lc->speed_caps = fw_caps; lc->acaps = fw_caps; } else { fw_caps = - lmm_to_fw_caps(link_ksettings->link_modes.advertising); + lmm_to_fw_caps(link_ksettings->link_modes.advertising); if (!(lc->pcaps & fw_caps)) return -EINVAL; lc->speed_caps = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6ba9099ca7fe..73fc2479b4f4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5653,7 +5653,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_GSO_UDP_TUNNEL | NETIF_F_TSO | NETIF_F_TSO6; - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_HW_TLS_RECORD; } if (highdma) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index c7d2b4dc7568..02fc63fa7f25 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -444,8 +444,7 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) if (!max_tids) return NULL; - t = kvzalloc(sizeof(*t) + - (max_tids * sizeof(struct cxgb4_link)), GFP_KERNEL); + t = kvzalloc(struct_size(t, table, max_tids), GFP_KERNEL); if (!t) return NULL; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 2b03f6187a24..c5e5466ee38b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4105,6 +4105,9 @@ static inline fw_port_cap32_t cc_to_fwcap_fec(enum cc_fec cc_fec) * @mbox: the Firmware Mailbox to use * @port: the Port ID * @lc: the Port's Link Configuration + * @sleep_ok: if true we may sleep while awaiting command completion + * @timeout: time to wait for command to finish before timing out + * (negative implies @sleep_ok=false) * * Set up a port's MAC and PHY according to a desired link configuration. * - If the PHY can auto-negotiate first decide what to advertise, then @@ -4124,6 +4127,7 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, int ret; fw_mdi = (FW_PORT_CAP32_MDI_V(FW_PORT_CAP32_MDI_AUTO) & lc->pcaps); + /* Convert driver coding of Pause Frame Flow Control settings into the * Firmware's API. */ @@ -4143,8 +4147,13 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, fw_fec = cc_to_fwcap_fec(cc_fec); /* Figure out what our Requested Port Capabilities are going to be. + * Note parallel structure in t4_handle_get_port_info() and + * init_link_config(). */ if (!(lc->pcaps & FW_PORT_CAP32_ANEG)) { + if (lc->autoneg == AUTONEG_ENABLE) + return -EINVAL; + rcap = lc->acaps | fw_fc | fw_fec; lc->fc = lc->requested_fc & ~PAUSE_AUTONEG; lc->fec = cc_fec; @@ -4156,7 +4165,11 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, rcap = lc->acaps | fw_fc | fw_fec | fw_mdi; } - /* Note that older Firmware doesn't have FW_PORT_CAP32_FORCE_PAUSE, so + /* Some Requested Port Capabilities are trivially wrong if they exceed + * the Physical Port Capabilities. We can check that here and provide + * moderately useful feedback in the system log. + * + * Note that older Firmware doesn't have FW_PORT_CAP32_FORCE_PAUSE, so * we need to exclude this from this check in order to maintain * compatibility ... */ @@ -4185,6 +4198,13 @@ int t4_link_l1cfg_core(struct adapter *adapter, unsigned int mbox, ret = t4_wr_mbox_meat_timeout(adapter, mbox, &cmd, sizeof(cmd), NULL, sleep_ok, timeout); + + /* Unfortunately, even if the Requested Port Capabilities "fit" within + * the Physical Port Capabilities, some combinations of features may + * still not be leagal. For example, 40Gb/s and Reed-Solomon Forward + * Error Correction. So if the Firmware rejects the L1 Configure + * request, flag that here. + */ if (ret) { dev_err(adapter->pdev_dev, "Requested Port Capabilities %#x rejected, error %d\n", @@ -8461,6 +8481,10 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) fc = fwcap_to_cc_pause(linkattr); speed = fwcap_to_speed(linkattr); + /* Reset state for communicating new Transceiver Module status and + * whether the OS-dependent layer wants us to redo the current + * "sticky" L1 Configure Link Parameters. + */ lc->new_module = false; lc->redo_l1cfg = false; @@ -8497,9 +8521,15 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) */ pi->port_type = port_type; + /* Record new Module Type information. + */ pi->mod_type = mod_type; + /* Let the OS-dependent layer know if we have a new + * Transceiver Module inserted. + */ lc->new_module = t4_is_inserted_mod_type(mod_type); + t4_os_portmod_changed(adapter, pi->port_id); } @@ -8507,8 +8537,10 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) fc != lc->fc || fec != lc->fec) { /* something changed */ if (!link_ok && lc->link_ok) { lc->link_down_rc = linkdnrc; - dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n", - pi->tx_chan, t4_link_down_rc_str(linkdnrc)); + dev_warn_ratelimited(adapter->pdev_dev, + "Port %d link down, reason: %s\n", + pi->tx_chan, + t4_link_down_rc_str(linkdnrc)); } lc->link_ok = link_ok; lc->speed = speed; @@ -8518,6 +8550,11 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) lc->lpacaps = lpacaps; lc->acaps = acaps & ADVERT_MASK; + /* If we're not physically capable of Auto-Negotiation, note + * this as Auto-Negotiation disabled. Otherwise, we track + * what Auto-Negotiation settings we have. Note parallel + * structure in t4_link_l1cfg_core() and init_link_config(). + */ if (!(lc->acaps & FW_PORT_CAP32_ANEG)) { lc->autoneg = AUTONEG_DISABLE; } else if (lc->acaps & FW_PORT_CAP32_ANEG) { @@ -8535,6 +8572,10 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) t4_os_link_changed(adapter, pi->port_id, link_ok); } + /* If we have a new Transceiver Module and the OS-dependent code has + * told us that it wants us to redo whatever "sticky" L1 Configuration + * Link Parameters are set, do that now. + */ if (lc->new_module && lc->redo_l1cfg) { struct link_config old_lc; int ret; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2fab87e86561..1fa24af8099f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -791,6 +791,13 @@ static int cxgb4vf_open(struct net_device *dev) return err; } + /* It's possible that the basic port information could have + * changed since we first read it. + */ + err = t4vf_update_port_info(pi); + if (err < 0) + return err; + /* * Note that this interface is up and start everything up ... */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 5b8c08cf523f..84dff74ca9cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -2005,8 +2005,10 @@ static void t4vf_handle_get_port_info(struct port_info *pi, fc != lc->fc || fec != lc->fec) { /* something changed */ if (!link_ok && lc->link_ok) { lc->link_down_rc = linkdnrc; - dev_warn(adapter->pdev_dev, "Port %d link down, reason: %s\n", - pi->port_id, t4vf_link_down_rc_str(linkdnrc)); + dev_warn_ratelimited(adapter->pdev_dev, + "Port %d link down, reason: %s\n", + pi->port_id, + t4vf_link_down_rc_str(linkdnrc)); } lc->link_ok = link_ok; lc->speed = speed; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d5026909dec5..3c7c04406a2b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1270,10 +1270,6 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo) #define is_arp_allowed_on_bmc(adapter, skb) \ (is_arp(skb) && is_arp_filt_enabled(adapter)) -#define is_broadcast_packet(eh, adapter) \ - (is_multicast_ether_addr(eh->h_dest) && \ - !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast)) - #define is_arp(skb) (skb->protocol == htons(ETH_P_ARP)) #define is_arp_filt_enabled(adapter) \ diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index d3a62bc1f1c6..71793e03c3c8 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -97,5 +97,6 @@ config GIANFAR source "drivers/net/ethernet/freescale/dpaa/Kconfig" source "drivers/net/ethernet/freescale/dpaa2/Kconfig" +source "drivers/net/ethernet/freescale/enetc/Kconfig" endif # NET_VENDOR_FREESCALE diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 3b4ff08e3841..6a93293d31e0 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -23,3 +23,6 @@ obj-$(CONFIG_FSL_FMAN) += fman/ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/ obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/ + +obj-$(CONFIG_FSL_ENETC) += enetc/ +obj-$(CONFIG_FSL_ENETC_VF) += enetc/ diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index f53090cde041..dfebc30c4841 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2051,6 +2051,7 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) bool nonlinear = skb_is_nonlinear(skb); struct rtnl_link_stats64 *percpu_stats; struct dpaa_percpu_priv *percpu_priv; + struct netdev_queue *txq; struct dpaa_priv *priv; struct qm_fd fd; int offset = 0; @@ -2100,6 +2101,11 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) if (unlikely(err < 0)) goto skb_to_fd_failed; + txq = netdev_get_tx_queue(net_dev, queue_mapping); + + /* LLTX requires to do our own update of trans_start */ + txq->trans_start = jiffies; + if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig index 809a155eb193..f6d244c663fd 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig @@ -9,8 +9,9 @@ config FSL_DPAA2_ETH config FSL_DPAA2_PTP_CLOCK tristate "Freescale DPAA2 PTP Clock" - depends on FSL_DPAA2_ETH && POSIX_TIMERS - select PTP_1588_CLOCK + depends on FSL_DPAA2_ETH + imply PTP_1588_CLOCK + default y help This driver adds support for using the DPAA2 1588 timer module as a PTP clock. diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index 2f424e0a8225..d1e78cdd512f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o +fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o # Needed by the tracing framework diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c new file mode 100644 index 000000000000..a027f4a9d0cc --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2015 Freescale Semiconductor Inc. + * Copyright 2018-2019 NXP + */ +#include +#include +#include "dpaa2-eth.h" +#include "dpaa2-eth-debugfs.h" + +#define DPAA2_ETH_DBG_ROOT "dpaa2-eth" + +static struct dentry *dpaa2_dbg_root; + +static int dpaa2_dbg_cpu_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct rtnl_link_stats64 *stats; + struct dpaa2_eth_drv_stats *extras; + int i; + + seq_printf(file, "Per-CPU stats for %s\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s%16s%16s%16s%16s%16s\n", + "CPU", "Rx", "Rx Err", "Rx SG", "Tx", "Tx Err", "Tx conf", + "Tx SG", "Tx realloc", "Enq busy"); + + for_each_online_cpu(i) { + stats = per_cpu_ptr(priv->percpu_stats, i); + extras = per_cpu_ptr(priv->percpu_extras, i); + seq_printf(file, "%3d%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu%16llu\n", + i, + stats->rx_packets, + stats->rx_errors, + extras->rx_sg_frames, + stats->tx_packets, + stats->tx_errors, + extras->tx_conf_frames, + extras->tx_sg_frames, + extras->tx_reallocs, + extras->tx_portal_busy); + } + + return 0; +} + +static int dpaa2_dbg_cpu_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_cpu_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_cpu_ops = { + .open = dpaa2_dbg_cpu_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static char *fq_type_to_str(struct dpaa2_eth_fq *fq) +{ + switch (fq->type) { + case DPAA2_RX_FQ: + return "Rx"; + case DPAA2_TX_CONF_FQ: + return "Tx conf"; + default: + return "N/A"; + } +} + +static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct dpaa2_eth_fq *fq; + u32 fcnt, bcnt; + int i, err; + + seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s\n", + "VFQID", "CPU", "Type", "Frames", "Pending frames"); + + for (i = 0; i < priv->num_fqs; i++) { + fq = &priv->fq[i]; + err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt); + if (err) + fcnt = 0; + + seq_printf(file, "%5d%16d%16s%16llu%16u\n", + fq->fqid, + fq->target_cpu, + fq_type_to_str(fq), + fq->stats.frames, + fcnt); + } + + return 0; +} + +static int dpaa2_dbg_fqs_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_fqs_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_fq_ops = { + .open = dpaa2_dbg_fqs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset) +{ + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)file->private; + struct dpaa2_eth_channel *ch; + int i; + + seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name); + seq_printf(file, "%s%16s%16s%16s%16s\n", + "CHID", "CPU", "Deq busy", "CDANs", "Buf count"); + + for (i = 0; i < priv->num_channels; i++) { + ch = priv->channel[i]; + seq_printf(file, "%4d%16d%16llu%16llu%16d\n", + ch->ch_id, + ch->nctx.desired_cpu, + ch->stats.dequeue_portal_busy, + ch->stats.cdan, + ch->buf_count); + } + + return 0; +} + +static int dpaa2_dbg_ch_open(struct inode *inode, struct file *file) +{ + int err; + struct dpaa2_eth_priv *priv = (struct dpaa2_eth_priv *)inode->i_private; + + err = single_open(file, dpaa2_dbg_ch_show, priv); + if (err < 0) + netdev_err(priv->net_dev, "single_open() failed\n"); + + return err; +} + +static const struct file_operations dpaa2_dbg_ch_ops = { + .open = dpaa2_dbg_ch_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) +{ + if (!dpaa2_dbg_root) + return; + + /* Create a directory for the interface */ + priv->dbg.dir = debugfs_create_dir(priv->net_dev->name, + dpaa2_dbg_root); + if (!priv->dbg.dir) { + netdev_err(priv->net_dev, "debugfs_create_dir() failed\n"); + return; + } + + /* per-cpu stats file */ + priv->dbg.cpu_stats = debugfs_create_file("cpu_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_cpu_ops); + if (!priv->dbg.cpu_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_cpu_stats; + } + + /* per-fq stats file */ + priv->dbg.fq_stats = debugfs_create_file("fq_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_fq_ops); + if (!priv->dbg.fq_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_fq_stats; + } + + /* per-fq stats file */ + priv->dbg.ch_stats = debugfs_create_file("ch_stats", 0444, + priv->dbg.dir, priv, + &dpaa2_dbg_ch_ops); + if (!priv->dbg.fq_stats) { + netdev_err(priv->net_dev, "debugfs_create_file() failed\n"); + goto err_ch_stats; + } + + return; + +err_ch_stats: + debugfs_remove(priv->dbg.fq_stats); +err_fq_stats: + debugfs_remove(priv->dbg.cpu_stats); +err_cpu_stats: + debugfs_remove(priv->dbg.dir); +} + +void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) +{ + debugfs_remove(priv->dbg.fq_stats); + debugfs_remove(priv->dbg.ch_stats); + debugfs_remove(priv->dbg.cpu_stats); + debugfs_remove(priv->dbg.dir); +} + +void dpaa2_eth_dbg_init(void) +{ + dpaa2_dbg_root = debugfs_create_dir(DPAA2_ETH_DBG_ROOT, NULL); + if (!dpaa2_dbg_root) { + pr_err("DPAA2-ETH: debugfs create failed\n"); + return; + } + + pr_debug("DPAA2-ETH: debugfs created\n"); +} + +void dpaa2_eth_dbg_exit(void) +{ + debugfs_remove(dpaa2_dbg_root); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h new file mode 100644 index 000000000000..4f63de997a26 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2015 Freescale Semiconductor Inc. + * Copyright 2018-2019 NXP + */ +#ifndef DPAA2_ETH_DEBUGFS_H +#define DPAA2_ETH_DEBUGFS_H + +#include + +struct dpaa2_eth_priv; + +struct dpaa2_debugfs { + struct dentry *dir; + struct dentry *fq_stats; + struct dentry *ch_stats; + struct dentry *cpu_stats; +}; + +#ifdef CONFIG_DEBUG_FS +void dpaa2_eth_dbg_init(void); +void dpaa2_eth_dbg_exit(void); +void dpaa2_dbg_add(struct dpaa2_eth_priv *priv); +void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv); +#else +static inline void dpaa2_eth_dbg_init(void) {} +static inline void dpaa2_eth_dbg_exit(void) {} +static inline void dpaa2_dbg_add(struct dpaa2_eth_priv *priv) {} +static inline void dpaa2_dbg_remove(struct dpaa2_eth_priv *priv) {} +#endif /* CONFIG_DEBUG_FS */ + +#endif /* DPAA2_ETH_DEBUGFS_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index ecf92189b0a4..04925c731f0b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -3083,6 +3083,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) goto err_netdev_reg; } +#ifdef CONFIG_DEBUG_FS + dpaa2_dbg_add(priv); +#endif + dev_info(dev, "Probed interface %s\n", net_dev->name); return 0; @@ -3126,6 +3130,9 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) net_dev = dev_get_drvdata(dev); priv = netdev_priv(net_dev); +#ifdef CONFIG_DEBUG_FS + dpaa2_dbg_remove(priv); +#endif unregister_netdev(net_dev); if (priv->do_link_poll) @@ -3170,4 +3177,25 @@ static struct fsl_mc_driver dpaa2_eth_driver = { .match_id_table = dpaa2_eth_match_id_table }; -module_fsl_mc_driver(dpaa2_eth_driver); +static int __init dpaa2_eth_driver_init(void) +{ + int err; + + dpaa2_eth_dbg_init(); + err = fsl_mc_driver_register(&dpaa2_eth_driver); + if (err) { + dpaa2_eth_dbg_exit(); + return err; + } + + return 0; +} + +static void __exit dpaa2_eth_driver_exit(void) +{ + dpaa2_eth_dbg_exit(); + fsl_mc_driver_unregister(&dpaa2_eth_driver); +} + +module_init(dpaa2_eth_driver_init); +module_exit(dpaa2_eth_driver_exit); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 69c965de192b..31fe486ec25f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -16,6 +16,7 @@ #include "dpni-cmd.h" #include "dpaa2-eth-trace.h" +#include "dpaa2-eth-debugfs.h" #define DPAA2_WRIOP_VERSION(x, y, z) ((x) << 10 | (y) << 5 | (z) << 0) @@ -365,6 +366,9 @@ struct dpaa2_eth_priv { struct dpaa2_eth_cls_rule *cls_rules; u8 rx_cls_enabled; struct bpf_prog *xdp_prog; +#ifdef CONFIG_DEBUG_FS + struct dpaa2_debugfs dbg; +#endif }; #define DPAA2_RXH_SUPPORTED (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \ @@ -405,6 +409,10 @@ static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv, #define dpaa2_eth_fs_count(priv) \ ((priv)->dpni_attrs.fs_entries) +/* We have exactly one {Rx, Tx conf} queue per channel */ +#define dpaa2_eth_queue_count(priv) \ + ((priv)->num_channels) + enum dpaa2_eth_rx_dist { DPAA2_ETH_RX_DIST_HASH, DPAA2_ETH_RX_DIST_CLS @@ -447,12 +455,6 @@ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv) DPAA2_ETH_RX_HWA_SIZE; } -/* We have exactly one {Rx, Tx conf} queue per channel */ -static int dpaa2_eth_queue_count(struct dpaa2_eth_priv *priv) -{ - return priv->num_channels; -} - int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags); int dpaa2_eth_cls_key_size(void); int dpaa2_eth_cls_fld_off(int prot, int field); diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig new file mode 100644 index 000000000000..f9dd26fbfc83 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +config FSL_ENETC + tristate "ENETC PF driver" + depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST) + help + This driver supports NXP ENETC gigabit ethernet controller PCIe + physical function (PF) devices, managing ENETC Ports at a privileged + level. + + If compiled as module (M), the module name is fsl-enetc. + +config FSL_ENETC_VF + tristate "ENETC VF driver" + depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST) + help + This driver supports NXP ENETC gigabit ethernet controller PCIe + virtual function (VF) devices enabled by the ENETC PF driver. + + If compiled as module (M), the module name is fsl-enetc-vf. diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile new file mode 100644 index 000000000000..9529b01872ca --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o +fsl-enetc-$(CONFIG_FSL_ENETC) += enetc.o enetc_cbdr.o enetc_ethtool.o +fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o +fsl-enetc-objs := enetc_pf.o $(fsl-enetc-y) + +obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o + +ifeq ($(CONFIG_FSL_ENETC)$(CONFIG_FSL_ENETC_VF), yy) +fsl-enetc-vf-objs := enetc_vf.o +else +fsl-enetc-vf-$(CONFIG_FSL_ENETC_VF) += enetc.o enetc_cbdr.o \ + enetc_ethtool.o +fsl-enetc-vf-objs := enetc_vf.o $(fsl-enetc-vf-y) +endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c new file mode 100644 index 000000000000..5bb9eb35d76d --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -0,0 +1,1604 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include "enetc.h" +#include +#include +#include +#include + +/* ENETC overhead: optional extension BD + 1 BD gap */ +#define ENETC_TXBDS_NEEDED(val) ((val) + 2) +/* max # of chained Tx BDs is 15, including head and extension BD */ +#define ENETC_MAX_SKB_FRAGS 13 +#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) + +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb); + +netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_bdr *tx_ring; + int count; + + tx_ring = priv->tx_ring[skb->queue_mapping]; + + if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) + if (unlikely(skb_linearize(skb))) + goto drop_packet_err; + + count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */ + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } + + count = enetc_map_tx_buffs(tx_ring, skb); + if (unlikely(!count)) + goto drop_packet_err; + + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED) + netif_stop_subqueue(ndev, tx_ring->index); + + return NETDEV_TX_OK; + +drop_packet_err: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static bool enetc_tx_csum(struct sk_buff *skb, union enetc_tx_bd *txbd) +{ + int l3_start, l3_hsize; + u16 l3_flags, l4_flags; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return false; + + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + l4_flags = ENETC_TXBD_L4_TCP; + break; + case offsetof(struct udphdr, check): + l4_flags = ENETC_TXBD_L4_UDP; + break; + default: + skb_checksum_help(skb); + return false; + } + + l3_start = skb_network_offset(skb); + l3_hsize = skb_network_header_len(skb); + + l3_flags = 0; + if (skb->protocol == htons(ETH_P_IPV6)) + l3_flags = ENETC_TXBD_L3_IPV6; + + /* write BD fields */ + txbd->l3_csoff = enetc_txbd_l3_csoff(l3_start, l3_hsize, l3_flags); + txbd->l4_csoff = l4_flags; + + return true; +} + +static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->is_dma_page) + dma_unmap_page(tx_ring->dev, tx_swbd->dma, + tx_swbd->len, DMA_TO_DEVICE); + else + dma_unmap_single(tx_ring->dev, tx_swbd->dma, + tx_swbd->len, DMA_TO_DEVICE); + tx_swbd->dma = 0; +} + +static void enetc_free_tx_skb(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->dma) + enetc_unmap_tx_buff(tx_ring, tx_swbd); + + if (tx_swbd->skb) { + dev_kfree_skb_any(tx_swbd->skb); + tx_swbd->skb = NULL; + } +} + +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) +{ + struct enetc_tx_swbd *tx_swbd; + struct skb_frag_struct *frag; + int len = skb_headlen(skb); + union enetc_tx_bd temp_bd; + union enetc_tx_bd *txbd; + bool do_vlan, do_tstamp; + int i, count = 0; + unsigned int f; + dma_addr_t dma; + u8 flags = 0; + + i = tx_ring->next_to_use; + txbd = ENETC_TXBD(*tx_ring, i); + prefetchw(txbd); + + dma = dma_map_single(tx_ring->dev, skb->data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) + goto dma_err; + + temp_bd.addr = cpu_to_le64(dma); + temp_bd.buf_len = cpu_to_le16(len); + temp_bd.lstatus = 0; + + tx_swbd = &tx_ring->tx_swbd[i]; + tx_swbd->dma = dma; + tx_swbd->len = len; + tx_swbd->is_dma_page = 0; + count++; + + do_vlan = skb_vlan_tag_present(skb); + do_tstamp = skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP; + + if (do_vlan || do_tstamp) + flags |= ENETC_TXBD_FLAGS_EX; + + if (enetc_tx_csum(skb, &temp_bd)) + flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS; + + /* first BD needs frm_len and offload flags set */ + temp_bd.frm_len = cpu_to_le16(skb->len); + temp_bd.flags = flags; + + if (flags & ENETC_TXBD_FLAGS_EX) { + u8 e_flags = 0; + *txbd = temp_bd; + enetc_clear_tx_bd(&temp_bd); + + /* add extension BD for VLAN and/or timestamping */ + flags = 0; + tx_swbd++; + txbd++; + i++; + if (unlikely(i == tx_ring->bd_count)) { + i = 0; + tx_swbd = tx_ring->tx_swbd; + txbd = ENETC_TXBD(*tx_ring, 0); + } + prefetchw(txbd); + + if (do_vlan) { + temp_bd.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb)); + temp_bd.ext.tpid = 0; /* < C-TAG */ + e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS; + } + + if (do_tstamp) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP; + } + + temp_bd.ext.e_flags = e_flags; + count++; + } + + frag = &skb_shinfo(skb)->frags[0]; + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) { + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_err; + + *txbd = temp_bd; + enetc_clear_tx_bd(&temp_bd); + + flags = 0; + tx_swbd++; + txbd++; + i++; + if (unlikely(i == tx_ring->bd_count)) { + i = 0; + tx_swbd = tx_ring->tx_swbd; + txbd = ENETC_TXBD(*tx_ring, 0); + } + prefetchw(txbd); + + temp_bd.addr = cpu_to_le64(dma); + temp_bd.buf_len = cpu_to_le16(len); + + tx_swbd->dma = dma; + tx_swbd->len = len; + tx_swbd->is_dma_page = 1; + count++; + } + + /* last BD needs 'F' bit set */ + flags |= ENETC_TXBD_FLAGS_F; + temp_bd.flags = flags; + *txbd = temp_bd; + + tx_ring->tx_swbd[i].skb = skb; + + enetc_bdr_idx_inc(tx_ring, &i); + tx_ring->next_to_use = i; + + /* let H/W know BD ring has been updated */ + enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */ + + return count; + +dma_err: + dev_err(tx_ring->dev, "DMA map error"); + + do { + tx_swbd = &tx_ring->tx_swbd[i]; + enetc_free_tx_skb(tx_ring, tx_swbd); + if (i == 0) + i = tx_ring->bd_count; + i--; + } while (count--); + + return 0; +} + +static irqreturn_t enetc_msix(int irq, void *data) +{ + struct enetc_int_vector *v = data; + int i; + + /* disable interrupts */ + enetc_wr_reg(v->rbier, 0); + + for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0); + + napi_schedule_irqoff(&v->napi); + + return IRQ_HANDLED; +} + +static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget); +static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit); + +static int enetc_poll(struct napi_struct *napi, int budget) +{ + struct enetc_int_vector + *v = container_of(napi, struct enetc_int_vector, napi); + bool complete = true; + int work_done; + int i; + + for (i = 0; i < v->count_tx_rings; i++) + if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) + complete = false; + + work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget); + if (work_done == budget) + complete = false; + + if (!complete) + return budget; + + napi_complete_done(napi, work_done); + + /* enable interrupts */ + enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE); + + for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) + enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), + ENETC_TBIER_TXTIE); + + return work_done; +} + +static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) +{ + int pi = enetc_rd_reg(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK; + + return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; +} + +static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) +{ + struct net_device *ndev = tx_ring->ndev; + int tx_frm_cnt = 0, tx_byte_cnt = 0; + struct enetc_tx_swbd *tx_swbd; + int i, bds_to_clean; + + i = tx_ring->next_to_clean; + tx_swbd = &tx_ring->tx_swbd[i]; + bds_to_clean = enetc_bd_ready_count(tx_ring, i); + + while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { + bool is_eof = !!tx_swbd->skb; + + enetc_unmap_tx_buff(tx_ring, tx_swbd); + if (is_eof) { + napi_consume_skb(tx_swbd->skb, napi_budget); + tx_swbd->skb = NULL; + } + + tx_byte_cnt += tx_swbd->len; + + bds_to_clean--; + tx_swbd++; + i++; + if (unlikely(i == tx_ring->bd_count)) { + i = 0; + tx_swbd = tx_ring->tx_swbd; + } + + /* BD iteration loop end */ + if (is_eof) { + tx_frm_cnt++; + /* re-arm interrupt source */ + enetc_wr_reg(tx_ring->idr, BIT(tx_ring->index) | + BIT(16 + tx_ring->index)); + } + + if (unlikely(!bds_to_clean)) + bds_to_clean = enetc_bd_ready_count(tx_ring, i); + } + + tx_ring->next_to_clean = i; + tx_ring->stats.packets += tx_frm_cnt; + tx_ring->stats.bytes += tx_byte_cnt; + + if (unlikely(tx_frm_cnt && netif_carrier_ok(ndev) && + __netif_subqueue_stopped(ndev, tx_ring->index) && + (enetc_bd_unused(tx_ring) >= ENETC_TXBDS_MAX_NEEDED))) { + netif_wake_subqueue(ndev, tx_ring->index); + } + + return tx_frm_cnt != ENETC_DEFAULT_TX_WORK; +} + +static bool enetc_new_page(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *rx_swbd) +{ + struct page *page; + dma_addr_t addr; + + page = dev_alloc_page(); + if (unlikely(!page)) + return false; + + addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_ring->dev, addr))) { + __free_page(page); + + return false; + } + + rx_swbd->dma = addr; + rx_swbd->page = page; + rx_swbd->page_offset = ENETC_RXB_PAD; + + return true; +} + +static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt) +{ + struct enetc_rx_swbd *rx_swbd; + union enetc_rx_bd *rxbd; + int i, j; + + i = rx_ring->next_to_use; + rx_swbd = &rx_ring->rx_swbd[i]; + rxbd = ENETC_RXBD(*rx_ring, i); + + for (j = 0; j < buff_cnt; j++) { + /* try reuse page */ + if (unlikely(!rx_swbd->page)) { + if (unlikely(!enetc_new_page(rx_ring, rx_swbd))) { + rx_ring->stats.rx_alloc_errs++; + break; + } + } + + /* update RxBD */ + rxbd->w.addr = cpu_to_le64(rx_swbd->dma + + rx_swbd->page_offset); + /* clear 'R" as well */ + rxbd->r.lstatus = 0; + + rx_swbd++; + rxbd++; + i++; + if (unlikely(i == rx_ring->bd_count)) { + i = 0; + rx_swbd = rx_ring->rx_swbd; + rxbd = ENETC_RXBD(*rx_ring, 0); + } + } + + if (likely(j)) { + rx_ring->next_to_alloc = i; /* keep track from page reuse */ + rx_ring->next_to_use = i; + /* update ENETC's consumer index */ + enetc_wr_reg(rx_ring->rcir, i); + } + + return j; +} + +static void enetc_get_offloads(struct enetc_bdr *rx_ring, + union enetc_rx_bd *rxbd, struct sk_buff *skb) +{ + /* TODO: add tstamp, hashing */ + if (rx_ring->ndev->features & NETIF_F_RXCSUM) { + u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum); + + skb->csum = csum_unfold((__force __sum16)~htons(inet_csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + } + + /* copy VLAN to skb, if one is extracted, for now we assume it's a + * standard TPID, but HW also supports custom values + */ + if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxbd->r.vlan_opt)); +} + +static void enetc_process_skb(struct enetc_bdr *rx_ring, + struct sk_buff *skb) +{ + skb_record_rx_queue(skb, rx_ring->index); + skb->protocol = eth_type_trans(skb, rx_ring->ndev); +} + +static bool enetc_page_reusable(struct page *page) +{ + return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1); +} + +static void enetc_reuse_page(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *old) +{ + struct enetc_rx_swbd *new; + + new = &rx_ring->rx_swbd[rx_ring->next_to_alloc]; + + /* next buf that may reuse a page */ + enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc); + + /* copy page reference */ + *new = *old; +} + +static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, + int i, u16 size) +{ + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i]; + + dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma, + rx_swbd->page_offset, + size, DMA_FROM_DEVICE); + return rx_swbd; +} + +static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *rx_swbd) +{ + if (likely(enetc_page_reusable(rx_swbd->page))) { + rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE; + page_ref_inc(rx_swbd->page); + + enetc_reuse_page(rx_ring, rx_swbd); + + /* sync for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, + rx_swbd->page_offset, + ENETC_RXB_DMA_SIZE, + DMA_FROM_DEVICE); + } else { + dma_unmap_page(rx_ring->dev, rx_swbd->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + rx_swbd->page = NULL; +} + +static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, + int i, u16 size) +{ + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + struct sk_buff *skb; + void *ba; + + ba = page_address(rx_swbd->page) + rx_swbd->page_offset; + skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE); + if (unlikely(!skb)) { + rx_ring->stats.rx_alloc_errs++; + return NULL; + } + + skb_reserve(skb, ENETC_RXB_PAD); + __skb_put(skb, size); + + enetc_put_rx_buff(rx_ring, rx_swbd); + + return skb; +} + +static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i, + u16 size, struct sk_buff *skb) +{ + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page, + rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE); + + enetc_put_rx_buff(rx_ring, rx_swbd); +} + +#define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */ + +static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit) +{ + int rx_frm_cnt = 0, rx_byte_cnt = 0; + int cleaned_cnt, i; + + cleaned_cnt = enetc_bd_unused(rx_ring); + /* next descriptor to process */ + i = rx_ring->next_to_clean; + + while (likely(rx_frm_cnt < work_limit)) { + union enetc_rx_bd *rxbd; + struct sk_buff *skb; + u32 bd_status; + u16 size; + + if (cleaned_cnt >= ENETC_RXBD_BUNDLE) { + int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt); + + cleaned_cnt -= count; + } + + rxbd = ENETC_RXBD(*rx_ring, i); + bd_status = le32_to_cpu(rxbd->r.lstatus); + if (!bd_status) + break; + + enetc_wr_reg(rx_ring->idr, BIT(rx_ring->index)); + dma_rmb(); /* for reading other rxbd fields */ + size = le16_to_cpu(rxbd->r.buf_len); + skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); + if (!skb) + break; + + enetc_get_offloads(rx_ring, rxbd, skb); + + cleaned_cnt++; + rxbd++; + i++; + if (unlikely(i == rx_ring->bd_count)) { + i = 0; + rxbd = ENETC_RXBD(*rx_ring, 0); + } + + if (unlikely(bd_status & + ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { + dev_kfree_skb(skb); + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + dma_rmb(); + bd_status = le32_to_cpu(rxbd->r.lstatus); + rxbd++; + i++; + if (unlikely(i == rx_ring->bd_count)) { + i = 0; + rxbd = ENETC_RXBD(*rx_ring, 0); + } + } + + rx_ring->ndev->stats.rx_dropped++; + rx_ring->ndev->stats.rx_errors++; + + break; + } + + /* not last BD in frame? */ + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + bd_status = le32_to_cpu(rxbd->r.lstatus); + size = ENETC_RXB_DMA_SIZE; + + if (bd_status & ENETC_RXBD_LSTATUS_F) { + dma_rmb(); + size = le16_to_cpu(rxbd->r.buf_len); + } + + enetc_add_rx_buff_to_skb(rx_ring, i, size, skb); + + cleaned_cnt++; + rxbd++; + i++; + if (unlikely(i == rx_ring->bd_count)) { + i = 0; + rxbd = ENETC_RXBD(*rx_ring, 0); + } + } + + rx_byte_cnt += skb->len; + + enetc_process_skb(rx_ring, skb); + + napi_gro_receive(napi, skb); + + rx_frm_cnt++; + } + + rx_ring->next_to_clean = i; + + rx_ring->stats.packets += rx_frm_cnt; + rx_ring->stats.bytes += rx_byte_cnt; + + return rx_frm_cnt; +} + +/* Probing and Init */ +#define ENETC_MAX_RFS_SIZE 64 +void enetc_get_si_caps(struct enetc_si *si) +{ + struct enetc_hw *hw = &si->hw; + u32 val; + + /* find out how many of various resources we have to work with */ + val = enetc_rd(hw, ENETC_SICAPR0); + si->num_rx_rings = (val >> 16) & 0xff; + si->num_tx_rings = val & 0xff; + + val = enetc_rd(hw, ENETC_SIRFSCAPR); + si->num_fs_entries = ENETC_SIRFSCAPR_GET_NUM_RFS(val); + si->num_fs_entries = min(si->num_fs_entries, ENETC_MAX_RFS_SIZE); + + si->num_rss = 0; + val = enetc_rd(hw, ENETC_SIPCAPR0); + if (val & ENETC_SIPCAPR0_RSS) { + val = enetc_rd(hw, ENETC_SIRSSCAPR); + si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(val); + } +} + +static int enetc_dma_alloc_bdr(struct enetc_bdr *r, size_t bd_size) +{ + r->bd_base = dma_alloc_coherent(r->dev, r->bd_count * bd_size, + &r->bd_dma_base, GFP_KERNEL); + if (!r->bd_base) + return -ENOMEM; + + /* h/w requires 128B alignment */ + if (!IS_ALIGNED(r->bd_dma_base, 128)) { + dma_free_coherent(r->dev, r->bd_count * bd_size, r->bd_base, + r->bd_dma_base); + return -EINVAL; + } + + return 0; +} + +static int enetc_alloc_txbdr(struct enetc_bdr *txr) +{ + int err; + + txr->tx_swbd = vzalloc(txr->bd_count * sizeof(struct enetc_tx_swbd)); + if (!txr->tx_swbd) + return -ENOMEM; + + err = enetc_dma_alloc_bdr(txr, sizeof(union enetc_tx_bd)); + if (err) { + vfree(txr->tx_swbd); + return err; + } + + txr->next_to_clean = 0; + txr->next_to_use = 0; + + return 0; +} + +static void enetc_free_txbdr(struct enetc_bdr *txr) +{ + int size, i; + + for (i = 0; i < txr->bd_count; i++) + enetc_free_tx_skb(txr, &txr->tx_swbd[i]); + + size = txr->bd_count * sizeof(union enetc_tx_bd); + + dma_free_coherent(txr->dev, size, txr->bd_base, txr->bd_dma_base); + txr->bd_base = NULL; + + vfree(txr->tx_swbd); + txr->tx_swbd = NULL; +} + +static int enetc_alloc_tx_resources(struct enetc_ndev_priv *priv) +{ + int i, err; + + for (i = 0; i < priv->num_tx_rings; i++) { + err = enetc_alloc_txbdr(priv->tx_ring[i]); + + if (err) + goto fail; + } + + return 0; + +fail: + while (i-- > 0) + enetc_free_txbdr(priv->tx_ring[i]); + + return err; +} + +static void enetc_free_tx_resources(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_free_txbdr(priv->tx_ring[i]); +} + +static int enetc_alloc_rxbdr(struct enetc_bdr *rxr) +{ + int err; + + rxr->rx_swbd = vzalloc(rxr->bd_count * sizeof(struct enetc_rx_swbd)); + if (!rxr->rx_swbd) + return -ENOMEM; + + err = enetc_dma_alloc_bdr(rxr, sizeof(union enetc_rx_bd)); + if (err) { + vfree(rxr->rx_swbd); + return err; + } + + rxr->next_to_clean = 0; + rxr->next_to_use = 0; + rxr->next_to_alloc = 0; + + return 0; +} + +static void enetc_free_rxbdr(struct enetc_bdr *rxr) +{ + int size; + + size = rxr->bd_count * sizeof(union enetc_rx_bd); + + dma_free_coherent(rxr->dev, size, rxr->bd_base, rxr->bd_dma_base); + rxr->bd_base = NULL; + + vfree(rxr->rx_swbd); + rxr->rx_swbd = NULL; +} + +static int enetc_alloc_rx_resources(struct enetc_ndev_priv *priv) +{ + int i, err; + + for (i = 0; i < priv->num_rx_rings; i++) { + err = enetc_alloc_rxbdr(priv->rx_ring[i]); + + if (err) + goto fail; + } + + return 0; + +fail: + while (i-- > 0) + enetc_free_rxbdr(priv->rx_ring[i]); + + return err; +} + +static void enetc_free_rx_resources(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_free_rxbdr(priv->rx_ring[i]); +} + +static void enetc_free_tx_ring(struct enetc_bdr *tx_ring) +{ + int i; + + if (!tx_ring->tx_swbd) + return; + + for (i = 0; i < tx_ring->bd_count; i++) { + struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; + + enetc_free_tx_skb(tx_ring, tx_swbd); + } + + tx_ring->next_to_clean = 0; + tx_ring->next_to_use = 0; +} + +static void enetc_free_rx_ring(struct enetc_bdr *rx_ring) +{ + int i; + + if (!rx_ring->rx_swbd) + return; + + for (i = 0; i < rx_ring->bd_count; i++) { + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i]; + + if (!rx_swbd->page) + continue; + + dma_unmap_page(rx_ring->dev, rx_swbd->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + __free_page(rx_swbd->page); + rx_swbd->page = NULL; + } + + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->next_to_alloc = 0; +} + +static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_free_rx_ring(priv->rx_ring[i]); + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_free_tx_ring(priv->tx_ring[i]); +} + +static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +{ + int size = cbdr->bd_count * sizeof(struct enetc_cbd); + + cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base, + GFP_KERNEL); + if (!cbdr->bd_base) + return -ENOMEM; + + /* h/w requires 128B alignment */ + if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) { + dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base); + return -EINVAL; + } + + cbdr->next_to_clean = 0; + cbdr->next_to_use = 0; + + return 0; +} + +static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +{ + int size = cbdr->bd_count * sizeof(struct enetc_cbd); + + dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base); + cbdr->bd_base = NULL; +} + +static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) +{ + /* set CBDR cache attributes */ + enetc_wr(hw, ENETC_SICAR2, + ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); + + enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base)); + enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base)); + enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count)); + + enetc_wr(hw, ENETC_SICBDRPIR, 0); + enetc_wr(hw, ENETC_SICBDRCIR, 0); + + /* enable ring */ + enetc_wr(hw, ENETC_SICBDRMR, BIT(31)); + + cbdr->pir = hw->reg + ENETC_SICBDRPIR; + cbdr->cir = hw->reg + ENETC_SICBDRCIR; +} + +static void enetc_clear_cbdr(struct enetc_hw *hw) +{ + enetc_wr(hw, ENETC_SICBDRMR, 0); +} + +static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) +{ + int *rss_table; + int i; + + rss_table = kmalloc_array(si->num_rss, sizeof(*rss_table), GFP_KERNEL); + if (!rss_table) + return -ENOMEM; + + /* Set up RSS table defaults */ + for (i = 0; i < si->num_rss; i++) + rss_table[i] = i % num_groups; + + enetc_set_rss_table(si, rss_table, si->num_rss); + + kfree(rss_table); + + return 0; +} + +static int enetc_configure_si(struct enetc_ndev_priv *priv) +{ + struct enetc_si *si = priv->si; + struct enetc_hw *hw = &si->hw; + int err; + + enetc_setup_cbdr(hw, &si->cbd_ring); + /* set SI cache attributes */ + enetc_wr(hw, ENETC_SICAR0, + ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); + enetc_wr(hw, ENETC_SICAR1, ENETC_SICAR_MSI); + /* enable SI */ + enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN); + + if (si->num_rss) { + err = enetc_setup_default_rss_table(si, priv->num_rx_rings); + if (err) + return err; + } + + return 0; +} + +void enetc_init_si_rings_params(struct enetc_ndev_priv *priv) +{ + struct enetc_si *si = priv->si; + int cpus = num_online_cpus(); + + priv->tx_bd_count = ENETC_BDR_DEFAULT_SIZE; + priv->rx_bd_count = ENETC_BDR_DEFAULT_SIZE; + + /* Enable all available TX rings in order to configure as many + * priorities as possible, when needed. + * TODO: Make # of TX rings run-time configurable + */ + priv->num_rx_rings = min_t(int, cpus, si->num_rx_rings); + priv->num_tx_rings = si->num_tx_rings; + priv->bdr_int_num = cpus; + + /* SI specific */ + si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE; +} + +int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) +{ + struct enetc_si *si = priv->si; + int err; + + err = enetc_alloc_cbdr(priv->dev, &si->cbd_ring); + if (err) + return err; + + priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules), + GFP_KERNEL); + if (!priv->cls_rules) { + err = -ENOMEM; + goto err_alloc_cls; + } + + err = enetc_configure_si(priv); + if (err) + goto err_config_si; + + return 0; + +err_config_si: + kfree(priv->cls_rules); +err_alloc_cls: + enetc_clear_cbdr(&si->hw); + enetc_free_cbdr(priv->dev, &si->cbd_ring); + + return err; +} + +void enetc_free_si_resources(struct enetc_ndev_priv *priv) +{ + struct enetc_si *si = priv->si; + + enetc_clear_cbdr(&si->hw); + enetc_free_cbdr(priv->dev, &si->cbd_ring); + + kfree(priv->cls_rules); +} + +static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) +{ + int idx = tx_ring->index; + u32 tbmr; + + enetc_txbdr_wr(hw, idx, ENETC_TBBAR0, + lower_32_bits(tx_ring->bd_dma_base)); + + enetc_txbdr_wr(hw, idx, ENETC_TBBAR1, + upper_32_bits(tx_ring->bd_dma_base)); + + WARN_ON(!IS_ALIGNED(tx_ring->bd_count, 64)); /* multiple of 64 */ + enetc_txbdr_wr(hw, idx, ENETC_TBLENR, + ENETC_RTBLENR_LEN(tx_ring->bd_count)); + + /* clearing PI/CI registers for Tx not supported, adjust sw indexes */ + tx_ring->next_to_use = enetc_txbdr_rd(hw, idx, ENETC_TBPIR); + tx_ring->next_to_clean = enetc_txbdr_rd(hw, idx, ENETC_TBCIR); + + /* enable Tx ints by setting pkt thr to 1 */ + enetc_txbdr_wr(hw, idx, ENETC_TBICIR0, ENETC_TBICIR0_ICEN | 0x1); + + tbmr = ENETC_TBMR_EN; + if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) + tbmr |= ENETC_TBMR_VIH; + + /* enable ring */ + enetc_txbdr_wr(hw, idx, ENETC_TBMR, tbmr); + + tx_ring->tpir = hw->reg + ENETC_BDR(TX, idx, ENETC_TBPIR); + tx_ring->tcir = hw->reg + ENETC_BDR(TX, idx, ENETC_TBCIR); + tx_ring->idr = hw->reg + ENETC_SITXIDR; +} + +static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) +{ + int idx = rx_ring->index; + u32 rbmr; + + enetc_rxbdr_wr(hw, idx, ENETC_RBBAR0, + lower_32_bits(rx_ring->bd_dma_base)); + + enetc_rxbdr_wr(hw, idx, ENETC_RBBAR1, + upper_32_bits(rx_ring->bd_dma_base)); + + WARN_ON(!IS_ALIGNED(rx_ring->bd_count, 64)); /* multiple of 64 */ + enetc_rxbdr_wr(hw, idx, ENETC_RBLENR, + ENETC_RTBLENR_LEN(rx_ring->bd_count)); + + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); + + enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); + + /* enable Rx ints by setting pkt thr to 1 */ + enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1); + + rbmr = ENETC_RBMR_EN; + if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX) + rbmr |= ENETC_RBMR_VTE; + + rx_ring->rcir = hw->reg + ENETC_BDR(RX, idx, ENETC_RBCIR); + rx_ring->idr = hw->reg + ENETC_SIRXIDR; + + enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); + + /* enable ring */ + enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); +} + +static void enetc_setup_bdrs(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_setup_txbdr(&priv->si->hw, priv->tx_ring[i]); + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_setup_rxbdr(&priv->si->hw, priv->rx_ring[i]); +} + +static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) +{ + int idx = rx_ring->index; + + /* disable EN bit on ring */ + enetc_rxbdr_wr(hw, idx, ENETC_RBMR, 0); +} + +static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) +{ + int delay = 8, timeout = 100; + int idx = tx_ring->index; + + /* disable EN bit on ring */ + enetc_txbdr_wr(hw, idx, ENETC_TBMR, 0); + + /* wait for busy to clear */ + while (delay < timeout && + enetc_txbdr_rd(hw, idx, ENETC_TBSR) & ENETC_TBSR_BUSY) { + msleep(delay); + delay *= 2; + } + + if (delay >= timeout) + netdev_warn(tx_ring->ndev, "timeout for tx ring #%d clear\n", + idx); +} + +static void enetc_clear_bdrs(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_clear_txbdr(&priv->si->hw, priv->tx_ring[i]); + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_clear_rxbdr(&priv->si->hw, priv->rx_ring[i]); + + udelay(1); +} + +static int enetc_setup_irqs(struct enetc_ndev_priv *priv) +{ + struct pci_dev *pdev = priv->si->pdev; + cpumask_t cpu_mask; + int i, j, err; + + for (i = 0; i < priv->bdr_int_num; i++) { + int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i); + struct enetc_int_vector *v = priv->int_vector[i]; + int entry = ENETC_BDR_INT_BASE_IDX + i; + struct enetc_hw *hw = &priv->si->hw; + + snprintf(v->name, sizeof(v->name), "%s-rxtx%d", + priv->ndev->name, i); + err = request_irq(irq, enetc_msix, 0, v->name, v); + if (err) { + dev_err(priv->dev, "request_irq() failed!\n"); + goto irq_err; + } + + v->tbier_base = hw->reg + ENETC_BDR(TX, 0, ENETC_TBIER); + v->rbier = hw->reg + ENETC_BDR(RX, i, ENETC_RBIER); + + enetc_wr(hw, ENETC_SIMSIRRV(i), entry); + + for (j = 0; j < v->count_tx_rings; j++) { + int idx = v->tx_ring[j].index; + + enetc_wr(hw, ENETC_SIMSITRV(idx), entry); + } + cpumask_clear(&cpu_mask); + cpumask_set_cpu(i % num_online_cpus(), &cpu_mask); + irq_set_affinity_hint(irq, &cpu_mask); + } + + return 0; + +irq_err: + while (i--) { + int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i); + + irq_set_affinity_hint(irq, NULL); + free_irq(irq, priv->int_vector[i]); + } + + return err; +} + +static void enetc_free_irqs(struct enetc_ndev_priv *priv) +{ + struct pci_dev *pdev = priv->si->pdev; + int i; + + for (i = 0; i < priv->bdr_int_num; i++) { + int irq = pci_irq_vector(pdev, ENETC_BDR_INT_BASE_IDX + i); + + irq_set_affinity_hint(irq, NULL); + free_irq(irq, priv->int_vector[i]); + } +} + +static void enetc_enable_interrupts(struct enetc_ndev_priv *priv) +{ + int i; + + /* enable Tx & Rx event indication */ + for (i = 0; i < priv->num_rx_rings; i++) { + enetc_rxbdr_wr(&priv->si->hw, i, + ENETC_RBIER, ENETC_RBIER_RXTIE); + } + + for (i = 0; i < priv->num_tx_rings; i++) { + enetc_txbdr_wr(&priv->si->hw, i, + ENETC_TBIER, ENETC_TBIER_TXTIE); + } +} + +static void enetc_disable_interrupts(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + enetc_txbdr_wr(&priv->si->hw, i, ENETC_TBIER, 0); + + for (i = 0; i < priv->num_rx_rings; i++) + enetc_rxbdr_wr(&priv->si->hw, i, ENETC_RBIER, 0); +} + +static void adjust_link(struct net_device *ndev) +{ + struct phy_device *phydev = ndev->phydev; + + phy_print_status(phydev); +} + +static int enetc_phy_connect(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct phy_device *phydev; + + if (!priv->phy_node) + return 0; /* phy-less mode */ + + phydev = of_phy_connect(ndev, priv->phy_node, &adjust_link, + 0, priv->if_mode); + if (!phydev) { + dev_err(&ndev->dev, "could not attach to PHY\n"); + return -ENODEV; + } + + phy_attached_info(phydev); + + return 0; +} + +int enetc_open(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int i, err; + + err = enetc_setup_irqs(priv); + if (err) + return err; + + err = enetc_phy_connect(ndev); + if (err) + goto err_phy_connect; + + err = enetc_alloc_tx_resources(priv); + if (err) + goto err_alloc_tx; + + err = enetc_alloc_rx_resources(priv); + if (err) + goto err_alloc_rx; + + enetc_setup_bdrs(priv); + + err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(ndev, priv->num_rx_rings); + if (err) + goto err_set_queues; + + for (i = 0; i < priv->bdr_int_num; i++) + napi_enable(&priv->int_vector[i]->napi); + + enetc_enable_interrupts(priv); + + if (ndev->phydev) + phy_start(ndev->phydev); + else + netif_carrier_on(ndev); + + netif_tx_start_all_queues(ndev); + + return 0; + +err_set_queues: + enetc_free_rx_resources(priv); +err_alloc_rx: + enetc_free_tx_resources(priv); +err_alloc_tx: + if (ndev->phydev) + phy_disconnect(ndev->phydev); +err_phy_connect: + enetc_free_irqs(priv); + + return err; +} + +int enetc_close(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int i; + + netif_tx_stop_all_queues(ndev); + + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); + } else { + netif_carrier_off(ndev); + } + + for (i = 0; i < priv->bdr_int_num; i++) { + napi_synchronize(&priv->int_vector[i]->napi); + napi_disable(&priv->int_vector[i]->napi); + } + + enetc_disable_interrupts(priv); + enetc_clear_bdrs(priv); + + enetc_free_rxtx_rings(priv); + enetc_free_rx_resources(priv); + enetc_free_tx_resources(priv); + enetc_free_irqs(priv); + + return 0; +} + +struct net_device_stats *enetc_get_stats(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct net_device_stats *stats = &ndev->stats; + unsigned long packets = 0, bytes = 0; + int i; + + for (i = 0; i < priv->num_rx_rings; i++) { + packets += priv->rx_ring[i]->stats.packets; + bytes += priv->rx_ring[i]->stats.bytes; + } + + stats->rx_packets = packets; + stats->rx_bytes = bytes; + bytes = 0; + packets = 0; + + for (i = 0; i < priv->num_tx_rings; i++) { + packets += priv->tx_ring[i]->stats.packets; + bytes += priv->tx_ring[i]->stats.bytes; + } + + stats->tx_packets = packets; + stats->tx_bytes = bytes; + + return stats; +} + +static int enetc_set_rss(struct net_device *ndev, int en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + u32 reg; + + enetc_wr(hw, ENETC_SIRBGCR, priv->num_rx_rings); + + reg = enetc_rd(hw, ENETC_SIMR); + reg &= ~ENETC_SIMR_RSSE; + reg |= (en) ? ENETC_SIMR_RSSE : 0; + enetc_wr(hw, ENETC_SIMR, reg); + + return 0; +} + +int enetc_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + + if (changed & NETIF_F_RXHASH) + enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH)); + + return 0; +} + +int enetc_alloc_msix(struct enetc_ndev_priv *priv) +{ + struct pci_dev *pdev = priv->si->pdev; + int size, v_tx_rings; + int i, n, err, nvec; + + nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num; + /* allocate MSIX for both messaging and Rx/Tx interrupts */ + n = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX); + + if (n < 0) + return n; + + if (n != nvec) + return -EPERM; + + /* # of tx rings per int vector */ + v_tx_rings = priv->num_tx_rings / priv->bdr_int_num; + size = sizeof(struct enetc_int_vector) + + sizeof(struct enetc_bdr) * v_tx_rings; + + for (i = 0; i < priv->bdr_int_num; i++) { + struct enetc_int_vector *v; + struct enetc_bdr *bdr; + int j; + + v = kzalloc(size, GFP_KERNEL); + if (!v) { + err = -ENOMEM; + goto fail; + } + + priv->int_vector[i] = v; + + netif_napi_add(priv->ndev, &v->napi, enetc_poll, + NAPI_POLL_WEIGHT); + v->count_tx_rings = v_tx_rings; + + for (j = 0; j < v_tx_rings; j++) { + int idx; + + /* default tx ring mapping policy */ + if (priv->bdr_int_num == ENETC_MAX_BDR_INT) + idx = 2 * j + i; /* 2 CPUs */ + else + idx = j + i * v_tx_rings; /* default */ + + __set_bit(idx, &v->tx_rings_map); + bdr = &v->tx_ring[j]; + bdr->index = idx; + bdr->ndev = priv->ndev; + bdr->dev = priv->dev; + bdr->bd_count = priv->tx_bd_count; + priv->tx_ring[idx] = bdr; + } + + bdr = &v->rx_ring; + bdr->index = i; + bdr->ndev = priv->ndev; + bdr->dev = priv->dev; + bdr->bd_count = priv->rx_bd_count; + priv->rx_ring[i] = bdr; + } + + return 0; + +fail: + while (i--) { + netif_napi_del(&priv->int_vector[i]->napi); + kfree(priv->int_vector[i]); + } + + pci_free_irq_vectors(pdev); + + return err; +} + +void enetc_free_msix(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->bdr_int_num; i++) { + struct enetc_int_vector *v = priv->int_vector[i]; + + netif_napi_del(&v->napi); + } + + for (i = 0; i < priv->num_rx_rings; i++) + priv->rx_ring[i] = NULL; + + for (i = 0; i < priv->num_tx_rings; i++) + priv->tx_ring[i] = NULL; + + for (i = 0; i < priv->bdr_int_num; i++) { + kfree(priv->int_vector[i]); + priv->int_vector[i] = NULL; + } + + /* disable all MSIX for this device */ + pci_free_irq_vectors(priv->si->pdev); +} + +static void enetc_kfree_si(struct enetc_si *si) +{ + char *p = (char *)si - si->pad; + + kfree(p); +} + +static void enetc_detect_errata(struct enetc_si *si) +{ + if (si->pdev->revision == ENETC_REV1) + si->errata = ENETC_ERR_TXCSUM | ENETC_ERR_VLAN_ISOL | + ENETC_ERR_UCMCSWP; +} + +int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv) +{ + struct enetc_si *si, *p; + struct enetc_hw *hw; + size_t alloc_size; + int err, len; + + pcie_flr(pdev); + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "device enable failed\n"); + return err; + } + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, + "DMA configuration failed: 0x%x\n", err); + goto err_dma; + } + } + + err = pci_request_mem_regions(pdev, name); + if (err) { + dev_err(&pdev->dev, "pci_request_regions failed err=%d\n", err); + goto err_pci_mem_reg; + } + + pci_set_master(pdev); + + alloc_size = sizeof(struct enetc_si); + if (sizeof_priv) { + /* align priv to 32B */ + alloc_size = ALIGN(alloc_size, ENETC_SI_ALIGN); + alloc_size += sizeof_priv; + } + /* force 32B alignment for enetc_si */ + alloc_size += ENETC_SI_ALIGN - 1; + + p = kzalloc(alloc_size, GFP_KERNEL); + if (!p) { + err = -ENOMEM; + goto err_alloc_si; + } + + si = PTR_ALIGN(p, ENETC_SI_ALIGN); + si->pad = (char *)si - (char *)p; + + pci_set_drvdata(pdev, si); + si->pdev = pdev; + hw = &si->hw; + + len = pci_resource_len(pdev, ENETC_BAR_REGS); + hw->reg = ioremap(pci_resource_start(pdev, ENETC_BAR_REGS), len); + if (!hw->reg) { + err = -ENXIO; + dev_err(&pdev->dev, "ioremap() failed\n"); + goto err_ioremap; + } + if (len > ENETC_PORT_BASE) + hw->port = hw->reg + ENETC_PORT_BASE; + if (len > ENETC_GLOBAL_BASE) + hw->global = hw->reg + ENETC_GLOBAL_BASE; + + enetc_detect_errata(si); + + return 0; + +err_ioremap: + enetc_kfree_si(si); +err_alloc_si: + pci_release_mem_regions(pdev); +err_pci_mem_reg: +err_dma: + pci_disable_device(pdev); + + return err; +} + +void enetc_pci_remove(struct pci_dev *pdev) +{ + struct enetc_si *si = pci_get_drvdata(pdev); + struct enetc_hw *hw = &si->hw; + + iounmap(hw->reg); + enetc_kfree_si(si); + pci_release_mem_regions(pdev); + pci_disable_device(pdev); +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h new file mode 100644 index 000000000000..b274135c5103 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2017-2019 NXP */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "enetc_hw.h" + +#define ENETC_MAC_MAXFRM_SIZE 9600 +#define ENETC_MAX_MTU (ENETC_MAC_MAXFRM_SIZE - \ + (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) + +struct enetc_tx_swbd { + struct sk_buff *skb; + dma_addr_t dma; + u16 len; + u16 is_dma_page; +}; + +#define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE +#define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */ +#define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ +#define ENETC_RXB_DMA_SIZE \ + (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) + +struct enetc_rx_swbd { + dma_addr_t dma; + struct page *page; + u16 page_offset; +}; + +struct enetc_ring_stats { + unsigned int packets; + unsigned int bytes; + unsigned int rx_alloc_errs; +}; + +#define ENETC_BDR_DEFAULT_SIZE 1024 +#define ENETC_DEFAULT_TX_WORK 256 + +struct enetc_bdr { + struct device *dev; /* for DMA mapping */ + struct net_device *ndev; + void *bd_base; /* points to Rx or Tx BD ring */ + union { + void __iomem *tpir; + void __iomem *rcir; + }; + u16 index; + int bd_count; /* # of BDs */ + int next_to_use; + int next_to_clean; + union { + struct enetc_tx_swbd *tx_swbd; + struct enetc_rx_swbd *rx_swbd; + }; + union { + void __iomem *tcir; /* Tx */ + int next_to_alloc; /* Rx */ + }; + void __iomem *idr; /* Interrupt Detect Register pointer */ + + struct enetc_ring_stats stats; + + dma_addr_t bd_dma_base; +} ____cacheline_aligned_in_smp; + +static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i) +{ + if (unlikely(++*i == bdr->bd_count)) + *i = 0; +} + +static inline int enetc_bd_unused(struct enetc_bdr *bdr) +{ + if (bdr->next_to_clean > bdr->next_to_use) + return bdr->next_to_clean - bdr->next_to_use - 1; + + return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1; +} + +/* Control BD ring */ +#define ENETC_CBDR_DEFAULT_SIZE 64 +struct enetc_cbdr { + void *bd_base; /* points to Rx or Tx BD ring */ + void __iomem *pir; + void __iomem *cir; + + int bd_count; /* # of BDs */ + int next_to_use; + int next_to_clean; + + dma_addr_t bd_dma_base; +}; + +#define ENETC_TXBD(BDR, i) (&(((union enetc_tx_bd *)((BDR).bd_base))[i])) +#define ENETC_RXBD(BDR, i) (&(((union enetc_rx_bd *)((BDR).bd_base))[i])) + +struct enetc_msg_swbd { + void *vaddr; + dma_addr_t dma; + int size; +}; + +#define ENETC_REV1 0x1 +enum enetc_errata { + ENETC_ERR_TXCSUM = BIT(0), + ENETC_ERR_VLAN_ISOL = BIT(1), + ENETC_ERR_UCMCSWP = BIT(2), +}; + +/* PCI IEP device data */ +struct enetc_si { + struct pci_dev *pdev; + struct enetc_hw hw; + enum enetc_errata errata; + + struct net_device *ndev; /* back ref. */ + + struct enetc_cbdr cbd_ring; + + int num_rx_rings; /* how many rings are available in the SI */ + int num_tx_rings; + int num_fs_entries; + int num_rss; /* number of RSS buckets */ + unsigned short pad; +}; + +#define ENETC_SI_ALIGN 32 + +static inline void *enetc_si_priv(const struct enetc_si *si) +{ + return (char *)si + ALIGN(sizeof(struct enetc_si), ENETC_SI_ALIGN); +} + +static inline bool enetc_si_is_pf(struct enetc_si *si) +{ + return !!(si->hw.port); +} + +#define ENETC_MAX_NUM_TXQS 8 +#define ENETC_INT_NAME_MAX (IFNAMSIZ + 8) + +struct enetc_int_vector { + void __iomem *rbier; + void __iomem *tbier_base; + unsigned long tx_rings_map; + int count_tx_rings; + struct napi_struct napi; + char name[ENETC_INT_NAME_MAX]; + + struct enetc_bdr rx_ring ____cacheline_aligned_in_smp; + struct enetc_bdr tx_ring[0]; +}; + +struct enetc_cls_rule { + struct ethtool_rx_flow_spec fs; + int used; +}; + +#define ENETC_MAX_BDR_INT 2 /* fixed to max # of available cpus */ + +struct enetc_ndev_priv { + struct net_device *ndev; + struct device *dev; /* dma-mapping device */ + struct enetc_si *si; + + int bdr_int_num; /* number of Rx/Tx ring interrupts */ + struct enetc_int_vector *int_vector[ENETC_MAX_BDR_INT]; + u16 num_rx_rings, num_tx_rings; + u16 rx_bd_count, tx_bd_count; + + u16 msg_enable; + + struct enetc_bdr *tx_ring[16]; + struct enetc_bdr *rx_ring[16]; + + struct enetc_cls_rule *cls_rules; + + struct device_node *phy_node; + phy_interface_t if_mode; +}; + +/* Messaging */ + +/* VF-PF set primary MAC address message format */ +struct enetc_msg_cmd_set_primary_mac { + struct enetc_msg_cmd_header header; + struct sockaddr mac; +}; + +#define ENETC_CBD(R, i) (&(((struct enetc_cbd *)((R).bd_base))[i])) + +#define ENETC_CBDR_TIMEOUT 1000 /* usecs */ + +/* SI common */ +int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv); +void enetc_pci_remove(struct pci_dev *pdev); +int enetc_alloc_msix(struct enetc_ndev_priv *priv); +void enetc_free_msix(struct enetc_ndev_priv *priv); +void enetc_get_si_caps(struct enetc_si *si); +void enetc_init_si_rings_params(struct enetc_ndev_priv *priv); +int enetc_alloc_si_resources(struct enetc_ndev_priv *priv); +void enetc_free_si_resources(struct enetc_ndev_priv *priv); + +int enetc_open(struct net_device *ndev); +int enetc_close(struct net_device *ndev); +netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev); +struct net_device_stats *enetc_get_stats(struct net_device *ndev); +int enetc_set_features(struct net_device *ndev, + netdev_features_t features); +/* ethtool */ +void enetc_set_ethtool_ops(struct net_device *ndev); + +/* control buffer descriptor ring (CBDR) */ +int enetc_set_mac_flt_entry(struct enetc_si *si, int index, + char *mac_addr, int si_map); +int enetc_clear_mac_flt_entry(struct enetc_si *si, int index); +int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, + int index); +void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes); +int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count); +int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c new file mode 100644 index 000000000000..de466b71bf8f --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include "enetc.h" + +static void enetc_clean_cbdr(struct enetc_si *si) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + struct enetc_cbd *dest_cbd; + int i, status; + + i = ring->next_to_clean; + + while (enetc_rd_reg(ring->cir) != i) { + dest_cbd = ENETC_CBD(*ring, i); + status = dest_cbd->status_flags & ENETC_CBD_STATUS_MASK; + if (status) + dev_warn(&si->pdev->dev, "CMD err %04x for cmd %04x\n", + status, dest_cbd->cmd); + + memset(dest_cbd, 0, sizeof(*dest_cbd)); + + i = (i + 1) % ring->bd_count; + } + + ring->next_to_clean = i; +} + +static int enetc_cbd_unused(struct enetc_cbdr *r) +{ + return (r->next_to_clean - r->next_to_use - 1 + r->bd_count) % + r->bd_count; +} + +static int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd) +{ + struct enetc_cbdr *ring = &si->cbd_ring; + int timeout = ENETC_CBDR_TIMEOUT; + struct enetc_cbd *dest_cbd; + int i; + + if (unlikely(!ring->bd_base)) + return -EIO; + + if (unlikely(!enetc_cbd_unused(ring))) + enetc_clean_cbdr(si); + + i = ring->next_to_use; + dest_cbd = ENETC_CBD(*ring, i); + + /* copy command to the ring */ + *dest_cbd = *cbd; + i = (i + 1) % ring->bd_count; + + ring->next_to_use = i; + /* let H/W know BD ring has been updated */ + enetc_wr_reg(ring->pir, i); + + do { + if (enetc_rd_reg(ring->cir) == i) + break; + udelay(10); /* cannot sleep, rtnl_lock() */ + timeout -= 10; + } while (timeout); + + if (!timeout) + return -EBUSY; + + enetc_clean_cbdr(si); + + return 0; +} + +int enetc_clear_mac_flt_entry(struct enetc_si *si, int index) +{ + struct enetc_cbd cbd; + + memset(&cbd, 0, sizeof(cbd)); + + cbd.cls = 1; + cbd.status_flags = ENETC_CBD_FLAGS_SF; + cbd.index = cpu_to_le16(index); + + return enetc_send_cmd(si, &cbd); +} + +int enetc_set_mac_flt_entry(struct enetc_si *si, int index, + char *mac_addr, int si_map) +{ + struct enetc_cbd cbd; + u32 upper; + u16 lower; + + memset(&cbd, 0, sizeof(cbd)); + + /* fill up the "set" descriptor */ + cbd.cls = 1; + cbd.status_flags = ENETC_CBD_FLAGS_SF; + cbd.index = cpu_to_le16(index); + cbd.opt[3] = cpu_to_le32(si_map); + /* enable entry */ + cbd.opt[0] = cpu_to_le32(BIT(31)); + + upper = *(const u32 *)mac_addr; + lower = *(const u16 *)(mac_addr + 4); + cbd.addr[0] = cpu_to_le32(upper); + cbd.addr[1] = cpu_to_le32(lower); + + return enetc_send_cmd(si, &cbd); +} + +#define RFSE_ALIGN 64 +/* Set entry in RFS table */ +int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, + int index) +{ + struct enetc_cbd cbd = {.cmd = 0}; + dma_addr_t dma, dma_align; + void *tmp, *tmp_align; + int err; + + /* fill up the "set" descriptor */ + cbd.cmd = 0; + cbd.cls = 4; + cbd.index = cpu_to_le16(index); + cbd.length = cpu_to_le16(sizeof(*rfse)); + cbd.opt[3] = cpu_to_le32(0); /* SI */ + + tmp = dma_alloc_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&si->pdev->dev, "DMA mapping of RFS entry failed!\n"); + return -ENOMEM; + } + + dma_align = ALIGN(dma, RFSE_ALIGN); + tmp_align = PTR_ALIGN(tmp, RFSE_ALIGN); + memcpy(tmp_align, rfse, sizeof(*rfse)); + + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); + + err = enetc_send_cmd(si, &cbd); + if (err) + dev_err(&si->pdev->dev, "FS entry add failed (%d)!", err); + + dma_free_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN, + tmp, dma); + + return err; +} + +#define RSSE_ALIGN 64 +static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, + bool read) +{ + struct enetc_cbd cbd = {.cmd = 0}; + dma_addr_t dma, dma_align; + u8 *tmp, *tmp_align; + int err, i; + + if (count < RSSE_ALIGN) + /* HW only takes in a full 64 entry table */ + return -EINVAL; + + tmp = dma_alloc_coherent(&si->pdev->dev, count + RSSE_ALIGN, + &dma, GFP_KERNEL); + if (!tmp) { + dev_err(&si->pdev->dev, "DMA mapping of RSS table failed!\n"); + return -ENOMEM; + } + dma_align = ALIGN(dma, RSSE_ALIGN); + tmp_align = PTR_ALIGN(tmp, RSSE_ALIGN); + + if (!read) + for (i = 0; i < count; i++) + tmp_align[i] = (u8)(table[i]); + + /* fill up the descriptor */ + cbd.cmd = read ? 2 : 1; + cbd.cls = 3; + cbd.length = cpu_to_le16(count); + + cbd.addr[0] = cpu_to_le32(lower_32_bits(dma_align)); + cbd.addr[1] = cpu_to_le32(upper_32_bits(dma_align)); + + err = enetc_send_cmd(si, &cbd); + if (err) + dev_err(&si->pdev->dev, "RSS cmd failed (%d)!", err); + + if (read) + for (i = 0; i < count; i++) + table[i] = tmp_align[i]; + + dma_free_coherent(&si->pdev->dev, count + RSSE_ALIGN, tmp, dma); + + return err; +} + +/* Get RSS table */ +int enetc_get_rss_table(struct enetc_si *si, u32 *table, int count) +{ + return enetc_cmd_rss_table(si, table, count, true); +} + +/* Set RSS table */ +int enetc_set_rss_table(struct enetc_si *si, const u32 *table, int count) +{ + return enetc_cmd_rss_table(si, (u32 *)table, count, false); +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c new file mode 100644 index 000000000000..1ecad9ffabae --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -0,0 +1,597 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include +#include +#include "enetc.h" + +static const u32 enetc_si_regs[] = { + ENETC_SIMR, ENETC_SIPMAR0, ENETC_SIPMAR1, ENETC_SICBDRMR, + ENETC_SICBDRSR, ENETC_SICBDRBAR0, ENETC_SICBDRBAR1, ENETC_SICBDRPIR, + ENETC_SICBDRCIR, ENETC_SICBDRLENR, ENETC_SICAPR0, ENETC_SICAPR1, + ENETC_SIUEFDCR +}; + +static const u32 enetc_txbdr_regs[] = { + ENETC_TBMR, ENETC_TBSR, ENETC_TBBAR0, ENETC_TBBAR1, + ENETC_TBPIR, ENETC_TBCIR, ENETC_TBLENR, ENETC_TBIER +}; + +static const u32 enetc_rxbdr_regs[] = { + ENETC_RBMR, ENETC_RBSR, ENETC_RBBSR, ENETC_RBCIR, ENETC_RBBAR0, + ENETC_RBBAR1, ENETC_RBPIR, ENETC_RBLENR, ENETC_RBICIR0, ENETC_RBIER +}; + +static const u32 enetc_port_regs[] = { + ENETC_PMR, ENETC_PSR, ENETC_PSIPMR, ENETC_PSIPMAR0(0), + ENETC_PSIPMAR1(0), ENETC_PTXMBAR, ENETC_PCAPR0, ENETC_PCAPR1, + ENETC_PSICFGR0(0), ENETC_PRFSCAPR, ENETC_PTCMSDUR(0), + ENETC_PM0_CMD_CFG, ENETC_PM0_MAXFRM, ENETC_PM0_IF_MODE +}; + +static int enetc_get_reglen(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + int len; + + len = ARRAY_SIZE(enetc_si_regs); + len += ARRAY_SIZE(enetc_txbdr_regs) * priv->num_tx_rings; + len += ARRAY_SIZE(enetc_rxbdr_regs) * priv->num_rx_rings; + + if (hw->port) + len += ARRAY_SIZE(enetc_port_regs); + + len *= sizeof(u32) * 2; /* store 2 entries per reg: addr and value */ + + return len; +} + +static void enetc_get_regs(struct net_device *ndev, struct ethtool_regs *regs, + void *regbuf) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + u32 *buf = (u32 *)regbuf; + int i, j; + u32 addr; + + for (i = 0; i < ARRAY_SIZE(enetc_si_regs); i++) { + *buf++ = enetc_si_regs[i]; + *buf++ = enetc_rd(hw, enetc_si_regs[i]); + } + + for (i = 0; i < priv->num_tx_rings; i++) { + for (j = 0; j < ARRAY_SIZE(enetc_txbdr_regs); j++) { + addr = ENETC_BDR(TX, i, enetc_txbdr_regs[j]); + + *buf++ = addr; + *buf++ = enetc_rd(hw, addr); + } + } + + for (i = 0; i < priv->num_rx_rings; i++) { + for (j = 0; j < ARRAY_SIZE(enetc_rxbdr_regs); j++) { + addr = ENETC_BDR(RX, i, enetc_rxbdr_regs[j]); + + *buf++ = addr; + *buf++ = enetc_rd(hw, addr); + } + } + + if (!hw->port) + return; + + for (i = 0; i < ARRAY_SIZE(enetc_port_regs); i++) { + addr = ENETC_PORT_BASE + enetc_port_regs[i]; + *buf++ = addr; + *buf++ = enetc_rd(hw, addr); + } +} + +static const struct { + int reg; + char name[ETH_GSTRING_LEN]; +} enetc_si_counters[] = { + { ENETC_SIROCT, "SI rx octets" }, + { ENETC_SIRFRM, "SI rx frames" }, + { ENETC_SIRUCA, "SI rx u-cast frames" }, + { ENETC_SIRMCA, "SI rx m-cast frames" }, + { ENETC_SITOCT, "SI tx octets" }, + { ENETC_SITFRM, "SI tx frames" }, + { ENETC_SITUCA, "SI tx u-cast frames" }, + { ENETC_SITMCA, "SI tx m-cast frames" }, + { ENETC_RBDCR(0), "Rx ring 0 discarded frames" }, + { ENETC_RBDCR(1), "Rx ring 1 discarded frames" }, + { ENETC_RBDCR(2), "Rx ring 2 discarded frames" }, + { ENETC_RBDCR(3), "Rx ring 3 discarded frames" }, + { ENETC_RBDCR(4), "Rx ring 4 discarded frames" }, + { ENETC_RBDCR(5), "Rx ring 5 discarded frames" }, + { ENETC_RBDCR(6), "Rx ring 6 discarded frames" }, + { ENETC_RBDCR(7), "Rx ring 7 discarded frames" }, + { ENETC_RBDCR(8), "Rx ring 8 discarded frames" }, + { ENETC_RBDCR(9), "Rx ring 9 discarded frames" }, + { ENETC_RBDCR(10), "Rx ring 10 discarded frames" }, + { ENETC_RBDCR(11), "Rx ring 11 discarded frames" }, + { ENETC_RBDCR(12), "Rx ring 12 discarded frames" }, + { ENETC_RBDCR(13), "Rx ring 13 discarded frames" }, + { ENETC_RBDCR(14), "Rx ring 14 discarded frames" }, + { ENETC_RBDCR(15), "Rx ring 15 discarded frames" }, +}; + +static const struct { + int reg; + char name[ETH_GSTRING_LEN]; +} enetc_port_counters[] = { + { ENETC_PM0_REOCT, "MAC rx ethernet octets" }, + { ENETC_PM0_RALN, "MAC rx alignment errors" }, + { ENETC_PM0_RXPF, "MAC rx valid pause frames" }, + { ENETC_PM0_RFRM, "MAC rx valid frames" }, + { ENETC_PM0_RFCS, "MAC rx fcs errors" }, + { ENETC_PM0_RVLAN, "MAC rx VLAN frames" }, + { ENETC_PM0_RERR, "MAC rx frame errors" }, + { ENETC_PM0_RUCA, "MAC rx unicast frames" }, + { ENETC_PM0_RMCA, "MAC rx multicast frames" }, + { ENETC_PM0_RBCA, "MAC rx broadcast frames" }, + { ENETC_PM0_RDRP, "MAC rx dropped packets" }, + { ENETC_PM0_RPKT, "MAC rx packets" }, + { ENETC_PM0_RUND, "MAC rx undersized packets" }, + { ENETC_PM0_R64, "MAC rx 64 byte packets" }, + { ENETC_PM0_R127, "MAC rx 65-127 byte packets" }, + { ENETC_PM0_R255, "MAC rx 128-255 byte packets" }, + { ENETC_PM0_R511, "MAC rx 256-511 byte packets" }, + { ENETC_PM0_R1023, "MAC rx 512-1023 byte packets" }, + { ENETC_PM0_R1518, "MAC rx 1024-1518 byte packets" }, + { ENETC_PM0_R1519X, "MAC rx 1519 to max-octet packets" }, + { ENETC_PM0_ROVR, "MAC rx oversized packets" }, + { ENETC_PM0_RJBR, "MAC rx jabber packets" }, + { ENETC_PM0_RFRG, "MAC rx fragment packets" }, + { ENETC_PM0_RCNP, "MAC rx control packets" }, + { ENETC_PM0_RDRNTP, "MAC rx fifo drop" }, + { ENETC_PM0_TEOCT, "MAC tx ethernet octets" }, + { ENETC_PM0_TOCT, "MAC tx octets" }, + { ENETC_PM0_TCRSE, "MAC tx carrier sense errors" }, + { ENETC_PM0_TXPF, "MAC tx valid pause frames" }, + { ENETC_PM0_TFRM, "MAC tx frames" }, + { ENETC_PM0_TFCS, "MAC tx fcs errors" }, + { ENETC_PM0_TVLAN, "MAC tx VLAN frames" }, + { ENETC_PM0_TERR, "MAC tx frames" }, + { ENETC_PM0_TUCA, "MAC tx unicast frames" }, + { ENETC_PM0_TMCA, "MAC tx multicast frames" }, + { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, + { ENETC_PM0_TPKT, "MAC tx packets" }, + { ENETC_PM0_TUND, "MAC tx undersized packets" }, + { ENETC_PM0_T127, "MAC tx 65-127 byte packets" }, + { ENETC_PM0_T1023, "MAC tx 512-1023 byte packets" }, + { ENETC_PM0_T1518, "MAC tx 1024-1518 byte packets" }, + { ENETC_PM0_TCNP, "MAC tx control packets" }, + { ENETC_PM0_TDFR, "MAC tx deferred packets" }, + { ENETC_PM0_TMCOL, "MAC tx multiple collisions" }, + { ENETC_PM0_TSCOL, "MAC tx single collisions" }, + { ENETC_PM0_TLCOL, "MAC tx late collisions" }, + { ENETC_PM0_TECOL, "MAC tx excessive collisions" }, + { ENETC_UFDMF, "SI MAC nomatch u-cast discards" }, + { ENETC_MFDMF, "SI MAC nomatch m-cast discards" }, + { ENETC_PBFDSIR, "SI MAC nomatch b-cast discards" }, + { ENETC_PUFDVFR, "SI VLAN nomatch u-cast discards" }, + { ENETC_PMFDVFR, "SI VLAN nomatch m-cast discards" }, + { ENETC_PBFDVFR, "SI VLAN nomatch b-cast discards" }, + { ENETC_PFDMSAPR, "SI pruning discarded frames" }, + { ENETC_PICDR(0), "ICM DR0 discarded frames" }, + { ENETC_PICDR(1), "ICM DR1 discarded frames" }, + { ENETC_PICDR(2), "ICM DR2 discarded frames" }, + { ENETC_PICDR(3), "ICM DR3 discarded frames" }, +}; + +static const char rx_ring_stats[][ETH_GSTRING_LEN] = { + "Rx ring %2d frames", + "Rx ring %2d alloc errors", +}; + +static const char tx_ring_stats[][ETH_GSTRING_LEN] = { + "Tx ring %2d frames", +}; + +static int enetc_get_sset_count(struct net_device *ndev, int sset) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + if (sset == ETH_SS_STATS) + return ARRAY_SIZE(enetc_si_counters) + + ARRAY_SIZE(tx_ring_stats) * priv->num_tx_rings + + ARRAY_SIZE(rx_ring_stats) * priv->num_rx_rings + + (enetc_si_is_pf(priv->si) ? + ARRAY_SIZE(enetc_port_counters) : 0); + + return -EOPNOTSUPP; +} + +static void enetc_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + u8 *p = data; + int i, j; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) { + strlcpy(p, enetc_si_counters[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < priv->num_tx_rings; i++) { + for (j = 0; j < ARRAY_SIZE(tx_ring_stats); j++) { + snprintf(p, ETH_GSTRING_LEN, tx_ring_stats[j], + i); + p += ETH_GSTRING_LEN; + } + } + for (i = 0; i < priv->num_rx_rings; i++) { + for (j = 0; j < ARRAY_SIZE(rx_ring_stats); j++) { + snprintf(p, ETH_GSTRING_LEN, rx_ring_stats[j], + i); + p += ETH_GSTRING_LEN; + } + } + + if (!enetc_si_is_pf(priv->si)) + break; + + for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) { + strlcpy(p, enetc_port_counters[i].name, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void enetc_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *stats, u64 *data) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + int i, o = 0; + + for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) + data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg); + + for (i = 0; i < priv->num_tx_rings; i++) + data[o++] = priv->tx_ring[i]->stats.packets; + + for (i = 0; i < priv->num_rx_rings; i++) { + data[o++] = priv->rx_ring[i]->stats.packets; + data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs; + } + + if (!enetc_si_is_pf(priv->si)) + return; + + for (i = 0; i < ARRAY_SIZE(enetc_port_counters); i++) + data[o++] = enetc_port_rd(hw, enetc_port_counters[i].reg); +} + +#define ENETC_RSSHASH_L3 (RXH_L2DA | RXH_VLAN | RXH_L3_PROTO | RXH_IP_SRC | \ + RXH_IP_DST) +#define ENETC_RSSHASH_L4 (ENETC_RSSHASH_L3 | RXH_L4_B_0_1 | RXH_L4_B_2_3) +static int enetc_get_rsshash(struct ethtool_rxnfc *rxnfc) +{ + static const u32 rsshash[] = { + [TCP_V4_FLOW] = ENETC_RSSHASH_L4, + [UDP_V4_FLOW] = ENETC_RSSHASH_L4, + [SCTP_V4_FLOW] = ENETC_RSSHASH_L4, + [AH_ESP_V4_FLOW] = ENETC_RSSHASH_L3, + [IPV4_FLOW] = ENETC_RSSHASH_L3, + [TCP_V6_FLOW] = ENETC_RSSHASH_L4, + [UDP_V6_FLOW] = ENETC_RSSHASH_L4, + [SCTP_V6_FLOW] = ENETC_RSSHASH_L4, + [AH_ESP_V6_FLOW] = ENETC_RSSHASH_L3, + [IPV6_FLOW] = ENETC_RSSHASH_L3, + [ETHER_FLOW] = 0, + }; + + if (rxnfc->flow_type >= ARRAY_SIZE(rsshash)) + return -EINVAL; + + rxnfc->data = rsshash[rxnfc->flow_type]; + + return 0; +} + +/* current HW spec does byte reversal on everything including MAC addresses */ +static void ether_addr_copy_swap(u8 *dst, const u8 *src) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) + dst[i] = src[ETH_ALEN - i - 1]; +} + +static int enetc_set_cls_entry(struct enetc_si *si, + struct ethtool_rx_flow_spec *fs, bool en) +{ + struct ethtool_tcpip4_spec *l4ip4_h, *l4ip4_m; + struct ethtool_usrip4_spec *l3ip4_h, *l3ip4_m; + struct ethhdr *eth_h, *eth_m; + struct enetc_cmd_rfse rfse = { {0} }; + + if (!en) + goto done; + + switch (fs->flow_type & 0xff) { + case TCP_V4_FLOW: + l4ip4_h = &fs->h_u.tcp_ip4_spec; + l4ip4_m = &fs->m_u.tcp_ip4_spec; + goto l4ip4; + case UDP_V4_FLOW: + l4ip4_h = &fs->h_u.udp_ip4_spec; + l4ip4_m = &fs->m_u.udp_ip4_spec; + goto l4ip4; + case SCTP_V4_FLOW: + l4ip4_h = &fs->h_u.sctp_ip4_spec; + l4ip4_m = &fs->m_u.sctp_ip4_spec; +l4ip4: + rfse.sip_h[0] = l4ip4_h->ip4src; + rfse.sip_m[0] = l4ip4_m->ip4src; + rfse.dip_h[0] = l4ip4_h->ip4dst; + rfse.dip_m[0] = l4ip4_m->ip4dst; + rfse.sport_h = ntohs(l4ip4_h->psrc); + rfse.sport_m = ntohs(l4ip4_m->psrc); + rfse.dport_h = ntohs(l4ip4_h->pdst); + rfse.dport_m = ntohs(l4ip4_m->pdst); + if (l4ip4_m->tos) + netdev_warn(si->ndev, "ToS field is not supported and was ignored\n"); + rfse.ethtype_h = ETH_P_IP; /* IPv4 */ + rfse.ethtype_m = 0xffff; + break; + case IP_USER_FLOW: + l3ip4_h = &fs->h_u.usr_ip4_spec; + l3ip4_m = &fs->m_u.usr_ip4_spec; + + rfse.sip_h[0] = l3ip4_h->ip4src; + rfse.sip_m[0] = l3ip4_m->ip4src; + rfse.dip_h[0] = l3ip4_h->ip4dst; + rfse.dip_m[0] = l3ip4_m->ip4dst; + if (l3ip4_m->tos) + netdev_warn(si->ndev, "ToS field is not supported and was ignored\n"); + rfse.ethtype_h = ETH_P_IP; /* IPv4 */ + rfse.ethtype_m = 0xffff; + break; + case ETHER_FLOW: + eth_h = &fs->h_u.ether_spec; + eth_m = &fs->m_u.ether_spec; + + ether_addr_copy_swap(rfse.smac_h, eth_h->h_source); + ether_addr_copy_swap(rfse.smac_m, eth_m->h_source); + ether_addr_copy_swap(rfse.dmac_h, eth_h->h_dest); + ether_addr_copy_swap(rfse.dmac_m, eth_m->h_dest); + rfse.ethtype_h = ntohs(eth_h->h_proto); + rfse.ethtype_m = ntohs(eth_m->h_proto); + break; + default: + return -EOPNOTSUPP; + } + + rfse.mode |= ENETC_RFSE_EN; + if (fs->ring_cookie != RX_CLS_FLOW_DISC) { + rfse.mode |= ENETC_RFSE_MODE_BD; + rfse.result = fs->ring_cookie; + } +done: + return enetc_set_fs_entry(si, &rfse, fs->location); +} + +static int enetc_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc, + u32 *rule_locs) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int i, j; + + switch (rxnfc->cmd) { + case ETHTOOL_GRXRINGS: + rxnfc->data = priv->num_rx_rings; + break; + case ETHTOOL_GRXFH: + /* get RSS hash config */ + return enetc_get_rsshash(rxnfc); + case ETHTOOL_GRXCLSRLCNT: + /* total number of entries */ + rxnfc->data = priv->si->num_fs_entries; + /* number of entries in use */ + rxnfc->rule_cnt = 0; + for (i = 0; i < priv->si->num_fs_entries; i++) + if (priv->cls_rules[i].used) + rxnfc->rule_cnt++; + break; + case ETHTOOL_GRXCLSRULE: + if (rxnfc->fs.location >= priv->si->num_fs_entries) + return -EINVAL; + + /* get entry x */ + rxnfc->fs = priv->cls_rules[rxnfc->fs.location].fs; + break; + case ETHTOOL_GRXCLSRLALL: + /* total number of entries */ + rxnfc->data = priv->si->num_fs_entries; + /* array of indexes of used entries */ + j = 0; + for (i = 0; i < priv->si->num_fs_entries; i++) { + if (!priv->cls_rules[i].used) + continue; + if (j == rxnfc->rule_cnt) + return -EMSGSIZE; + rule_locs[j++] = i; + } + /* number of entries in use */ + rxnfc->rule_cnt = j; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int enetc_set_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *rxnfc) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int err; + + switch (rxnfc->cmd) { + case ETHTOOL_SRXCLSRLINS: + if (rxnfc->fs.location >= priv->si->num_fs_entries) + return -EINVAL; + + if (rxnfc->fs.ring_cookie >= priv->num_rx_rings && + rxnfc->fs.ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + err = enetc_set_cls_entry(priv->si, &rxnfc->fs, true); + if (err) + return err; + priv->cls_rules[rxnfc->fs.location].fs = rxnfc->fs; + priv->cls_rules[rxnfc->fs.location].used = 1; + break; + case ETHTOOL_SRXCLSRLDEL: + if (rxnfc->fs.location >= priv->si->num_fs_entries) + return -EINVAL; + + err = enetc_set_cls_entry(priv->si, &rxnfc->fs, false); + if (err) + return err; + priv->cls_rules[rxnfc->fs.location].used = 0; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static u32 enetc_get_rxfh_key_size(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + /* return the size of the RX flow hash key. PF only */ + return (priv->si->hw.port) ? ENETC_RSSHASH_KEY_SIZE : 0; +} + +static u32 enetc_get_rxfh_indir_size(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + /* return the size of the RX flow hash indirection table */ + return priv->si->num_rss; +} + +static int enetc_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + int err = 0, i; + + /* return hash function */ + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + /* return hash key */ + if (key && hw->port) + for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) + ((u32 *)key)[i] = enetc_port_rd(hw, ENETC_PRSSK(i)); + + /* return RSS table */ + if (indir) + err = enetc_get_rss_table(priv->si, indir, priv->si->num_rss); + + return err; +} + +void enetc_set_rss_key(struct enetc_hw *hw, const u8 *bytes) +{ + int i; + + for (i = 0; i < ENETC_RSSHASH_KEY_SIZE / 4; i++) + enetc_port_wr(hw, ENETC_PRSSK(i), ((u32 *)bytes)[i]); +} + +static int enetc_set_rxfh(struct net_device *ndev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + int err = 0; + + /* set hash key, if PF */ + if (key && hw->port) + enetc_set_rss_key(hw, key); + + /* set RSS table */ + if (indir) + err = enetc_set_rss_table(priv->si, indir, priv->si->num_rss); + + return err; +} + +static void enetc_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ring) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + ring->rx_pending = priv->rx_bd_count; + ring->tx_pending = priv->tx_bd_count; + + /* do some h/w sanity checks for BDR length */ + if (netif_running(ndev)) { + struct enetc_hw *hw = &priv->si->hw; + u32 val = enetc_rxbdr_rd(hw, 0, ENETC_RBLENR); + + if (val != priv->rx_bd_count) + netif_err(priv, hw, ndev, "RxBDR[RBLENR] = %d!\n", val); + + val = enetc_txbdr_rd(hw, 0, ENETC_TBLENR); + + if (val != priv->tx_bd_count) + netif_err(priv, hw, ndev, "TxBDR[TBLENR] = %d!\n", val); + } +} + +static const struct ethtool_ops enetc_pf_ethtool_ops = { + .get_regs_len = enetc_get_reglen, + .get_regs = enetc_get_regs, + .get_sset_count = enetc_get_sset_count, + .get_strings = enetc_get_strings, + .get_ethtool_stats = enetc_get_ethtool_stats, + .get_rxnfc = enetc_get_rxnfc, + .set_rxnfc = enetc_set_rxnfc, + .get_rxfh_key_size = enetc_get_rxfh_key_size, + .get_rxfh_indir_size = enetc_get_rxfh_indir_size, + .get_rxfh = enetc_get_rxfh, + .set_rxfh = enetc_set_rxfh, + .get_ringparam = enetc_get_ringparam, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, +}; + +static const struct ethtool_ops enetc_vf_ethtool_ops = { + .get_regs_len = enetc_get_reglen, + .get_regs = enetc_get_regs, + .get_sset_count = enetc_get_sset_count, + .get_strings = enetc_get_strings, + .get_ethtool_stats = enetc_get_ethtool_stats, + .get_rxnfc = enetc_get_rxnfc, + .set_rxnfc = enetc_set_rxnfc, + .get_rxfh_indir_size = enetc_get_rxfh_indir_size, + .get_rxfh = enetc_get_rxfh, + .set_rxfh = enetc_set_rxfh, + .get_ringparam = enetc_get_ringparam, +}; + +void enetc_set_ethtool_ops(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + if (enetc_si_is_pf(priv->si)) + ndev->ethtool_ops = &enetc_pf_ethtool_ops; + else + ndev->ethtool_ops = &enetc_vf_ethtool_ops; +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h new file mode 100644 index 000000000000..efa0b1a5ef4f --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -0,0 +1,532 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2017-2019 NXP */ + +#include + +/* ENETC device IDs */ +#define ENETC_DEV_ID_PF 0xe100 +#define ENETC_DEV_ID_VF 0xef00 + +/* ENETC register block BAR */ +#define ENETC_BAR_REGS 0 + +/** SI regs, offset: 0h */ +#define ENETC_SIMR 0 +#define ENETC_SIMR_EN BIT(31) +#define ENETC_SIMR_RSSE BIT(0) +#define ENETC_SICTR0 0x18 +#define ENETC_SICTR1 0x1c +#define ENETC_SIPCAPR0 0x20 +#define ENETC_SIPCAPR0_RSS BIT(8) +#define ENETC_SIPCAPR1 0x24 +#define ENETC_SITGTGR 0x30 +#define ENETC_SIRBGCR 0x38 +/* cache attribute registers for transactions initiated by ENETC */ +#define ENETC_SICAR0 0x40 +#define ENETC_SICAR1 0x44 +#define ENETC_SICAR2 0x48 +/* rd snoop, no alloc + * wr snoop, no alloc, partial cache line update for BDs and full cache line + * update for data + */ +#define ENETC_SICAR_RD_COHERENT 0x2b2b0000 +#define ENETC_SICAR_WR_COHERENT 0x00006727 +#define ENETC_SICAR_MSI 0x00300030 /* rd/wr device, no snoop, no alloc */ + +#define ENETC_SIPMAR0 0x80 +#define ENETC_SIPMAR1 0x84 + +/* VF-PF Message passing */ +#define ENETC_DEFAULT_MSG_SIZE 1024 /* and max size */ +/* msg size encoding: default and max msg value of 1024B encoded as 0 */ +static inline u32 enetc_vsi_set_msize(u32 size) +{ + return size < ENETC_DEFAULT_MSG_SIZE ? size >> 5 : 0; +} + +#define ENETC_PSIMSGRR 0x204 +#define ENETC_PSIMSGRR_MR_MASK GENMASK(2, 1) +#define ENETC_PSIMSGRR_MR(n) BIT((n) + 1) /* n = VSI index */ +#define ENETC_PSIVMSGRCVAR0(n) (0x210 + (n) * 0x8) /* n = VSI index */ +#define ENETC_PSIVMSGRCVAR1(n) (0x214 + (n) * 0x8) + +#define ENETC_VSIMSGSR 0x204 /* RO */ +#define ENETC_VSIMSGSR_MB BIT(0) +#define ENETC_VSIMSGSR_MS BIT(1) +#define ENETC_VSIMSGSNDAR0 0x210 +#define ENETC_VSIMSGSNDAR1 0x214 + +#define ENETC_SIMSGSR_SET_MC(val) ((val) << 16) +#define ENETC_SIMSGSR_GET_MC(val) ((val) >> 16) + +/* SI statistics */ +#define ENETC_SIROCT 0x300 +#define ENETC_SIRFRM 0x308 +#define ENETC_SIRUCA 0x310 +#define ENETC_SIRMCA 0x318 +#define ENETC_SITOCT 0x320 +#define ENETC_SITFRM 0x328 +#define ENETC_SITUCA 0x330 +#define ENETC_SITMCA 0x338 +#define ENETC_RBDCR(n) (0x8180 + (n) * 0x200) + +/* Control BDR regs */ +#define ENETC_SICBDRMR 0x800 +#define ENETC_SICBDRSR 0x804 /* RO */ +#define ENETC_SICBDRBAR0 0x810 +#define ENETC_SICBDRBAR1 0x814 +#define ENETC_SICBDRPIR 0x818 +#define ENETC_SICBDRCIR 0x81c +#define ENETC_SICBDRLENR 0x820 + +#define ENETC_SICAPR0 0x900 +#define ENETC_SICAPR1 0x904 + +#define ENETC_PSIIER 0xa00 +#define ENETC_PSIIER_MR_MASK GENMASK(2, 1) +#define ENETC_PSIIDR 0xa08 +#define ENETC_SITXIDR 0xa18 +#define ENETC_SIRXIDR 0xa28 +#define ENETC_SIMSIVR 0xa30 + +#define ENETC_SIMSITRV(n) (0xB00 + (n) * 0x4) +#define ENETC_SIMSIRRV(n) (0xB80 + (n) * 0x4) + +#define ENETC_SIUEFDCR 0xe28 + +#define ENETC_SIRFSCAPR 0x1200 +#define ENETC_SIRFSCAPR_GET_NUM_RFS(val) ((val) & 0x7f) +#define ENETC_SIRSSCAPR 0x1600 +#define ENETC_SIRSSCAPR_GET_NUM_RSS(val) (BIT((val) & 0xf) * 32) + +/** SI BDR sub-blocks, n = 0..7 */ +enum enetc_bdr_type {TX, RX}; +#define ENETC_BDR_OFF(i) ((i) * 0x200) +#define ENETC_BDR(t, i, r) (0x8000 + (t) * 0x100 + ENETC_BDR_OFF(i) + (r)) +/* RX BDR reg offsets */ +#define ENETC_RBMR 0 +#define ENETC_RBMR_BDS BIT(2) +#define ENETC_RBMR_VTE BIT(5) +#define ENETC_RBMR_EN BIT(31) +#define ENETC_RBSR 0x4 +#define ENETC_RBBSR 0x8 +#define ENETC_RBCIR 0xc +#define ENETC_RBBAR0 0x10 +#define ENETC_RBBAR1 0x14 +#define ENETC_RBPIR 0x18 +#define ENETC_RBLENR 0x20 +#define ENETC_RBIER 0xa0 +#define ENETC_RBIER_RXTIE BIT(0) +#define ENETC_RBIDR 0xa4 +#define ENETC_RBICIR0 0xa8 +#define ENETC_RBICIR0_ICEN BIT(31) + +/* TX BDR reg offsets */ +#define ENETC_TBMR 0 +#define ENETC_TBSR_BUSY BIT(0) +#define ENETC_TBMR_VIH BIT(9) +#define ENETC_TBMR_PRIO_MASK GENMASK(2, 0) +#define ENETC_TBMR_PRIO_SET(val) val +#define ENETC_TBMR_EN BIT(31) +#define ENETC_TBSR 0x4 +#define ENETC_TBBAR0 0x10 +#define ENETC_TBBAR1 0x14 +#define ENETC_TBPIR 0x18 +#define ENETC_TBCIR 0x1c +#define ENETC_TBCIR_IDX_MASK 0xffff +#define ENETC_TBLENR 0x20 +#define ENETC_TBIER 0xa0 +#define ENETC_TBIER_TXTIE BIT(0) +#define ENETC_TBIDR 0xa4 +#define ENETC_TBICIR0 0xa8 +#define ENETC_TBICIR0_ICEN BIT(31) + +#define ENETC_RTBLENR_LEN(n) ((n) & ~0x7) + +/* Port regs, offset: 1_0000h */ +#define ENETC_PORT_BASE 0x10000 +#define ENETC_PMR 0x0000 +#define ENETC_PMR_EN GENMASK(18, 16) +#define ENETC_PSR 0x0004 /* RO */ +#define ENETC_PSIPMR 0x0018 +#define ENETC_PSIPMR_SET_UP(n) BIT(n) /* n = SI index */ +#define ENETC_PSIPMR_SET_MP(n) BIT((n) + 16) +#define ENETC_PSIPVMR 0x001c +#define ENETC_VLAN_PROMISC_MAP_ALL 0x7 +#define ENETC_PSIPVMR_SET_VP(simap) ((simap) & 0x7) +#define ENETC_PSIPVMR_SET_VUTA(simap) (((simap) & 0x7) << 16) +#define ENETC_PSIPMAR0(n) (0x0100 + (n) * 0x8) /* n = SI index */ +#define ENETC_PSIPMAR1(n) (0x0104 + (n) * 0x8) +#define ENETC_PVCLCTR 0x0208 +#define ENETC_VLAN_TYPE_C BIT(0) +#define ENETC_VLAN_TYPE_S BIT(1) +#define ENETC_PVCLCTR_OVTPIDL(bmp) ((bmp) & 0xff) /* VLAN_TYPE */ +#define ENETC_PSIVLANR(n) (0x0240 + (n) * 4) /* n = SI index */ +#define ENETC_PSIVLAN_EN BIT(31) +#define ENETC_PSIVLAN_SET_QOS(val) ((u32)(val) << 12) +#define ENETC_PTXMBAR 0x0608 +#define ENETC_PCAPR0 0x0900 +#define ENETC_PCAPR0_RXBDR(val) ((val) >> 24) +#define ENETC_PCAPR0_TXBDR(val) (((val) >> 16) & 0xff) +#define ENETC_PCAPR1 0x0904 +#define ENETC_PSICFGR0(n) (0x0940 + (n) * 0xc) /* n = SI index */ +#define ENETC_PSICFGR0_SET_TXBDR(val) ((val) & 0xff) +#define ENETC_PSICFGR0_SET_RXBDR(val) (((val) & 0xff) << 16) +#define ENETC_PSICFGR0_VTE BIT(12) +#define ENETC_PSICFGR0_SIVIE BIT(14) +#define ENETC_PSICFGR0_ASE BIT(15) +#define ENETC_PSICFGR0_SIVC(bmp) (((bmp) & 0xff) << 24) /* VLAN_TYPE */ + +#define ENETC_PTCCBSR0(n) (0x1110 + (n) * 8) /* n = 0 to 7*/ +#define ENETC_PTCCBSR1(n) (0x1114 + (n) * 8) /* n = 0 to 7*/ +#define ENETC_RSSHASH_KEY_SIZE 40 +#define ENETC_PRSSK(n) (0x1410 + (n) * 4) /* n = [0..9] */ +#define ENETC_PSIVLANFMR 0x1700 +#define ENETC_PSIVLANFMR_VS BIT(0) +#define ENETC_PRFSMR 0x1800 +#define ENETC_PRFSMR_RFSE BIT(31) +#define ENETC_PRFSCAPR 0x1804 +#define ENETC_PRFSCAPR_GET_NUM_RFS(val) ((((val) & 0xf) + 1) * 16) +#define ENETC_PSIRFSCFGR(n) (0x1814 + (n) * 4) /* n = SI index */ +#define ENETC_PFPMR 0x1900 +#define ENETC_PFPMR_PMACE BIT(1) +#define ENETC_PFPMR_MWLM BIT(0) +#define ENETC_PSIUMHFR0(n, err) (((err) ? 0x1d08 : 0x1d00) + (n) * 0x10) +#define ENETC_PSIUMHFR1(n) (0x1d04 + (n) * 0x10) +#define ENETC_PSIMMHFR0(n, err) (((err) ? 0x1d00 : 0x1d08) + (n) * 0x10) +#define ENETC_PSIMMHFR1(n) (0x1d0c + (n) * 0x10) +#define ENETC_PSIVHFR0(n) (0x1e00 + (n) * 8) /* n = SI index */ +#define ENETC_PSIVHFR1(n) (0x1e04 + (n) * 8) /* n = SI index */ +#define ENETC_MMCSR 0x1f00 +#define ENETC_MMCSR_ME BIT(16) +#define ENETC_PTCMSDUR(n) (0x2020 + (n) * 4) /* n = TC index [0..7] */ + +#define ENETC_PM0_CMD_CFG 0x8008 +#define ENETC_PM1_CMD_CFG 0x9008 +#define ENETC_PM0_TX_EN BIT(0) +#define ENETC_PM0_RX_EN BIT(1) +#define ENETC_PM0_PROMISC BIT(4) +#define ENETC_PM0_CMD_XGLP BIT(10) +#define ENETC_PM0_CMD_TXP BIT(11) +#define ENETC_PM0_CMD_PHY_TX_EN BIT(15) +#define ENETC_PM0_CMD_SFD BIT(21) +#define ENETC_PM0_MAXFRM 0x8014 +#define ENETC_SET_TX_MTU(val) ((val) << 16) +#define ENETC_SET_MAXFRM(val) ((val) & 0xffff) +#define ENETC_PM0_IF_MODE 0x8300 +#define ENETC_PMO_IFM_RG BIT(2) +#define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) +#define ENETC_PM0_IFM_RGAUTO (BIT(15) | ENETC_PMO_IFM_RG | BIT(1)) +#define ENETC_PM0_IFM_XGMII BIT(12) + +/* MAC counters */ +#define ENETC_PM0_REOCT 0x8100 +#define ENETC_PM0_RALN 0x8110 +#define ENETC_PM0_RXPF 0x8118 +#define ENETC_PM0_RFRM 0x8120 +#define ENETC_PM0_RFCS 0x8128 +#define ENETC_PM0_RVLAN 0x8130 +#define ENETC_PM0_RERR 0x8138 +#define ENETC_PM0_RUCA 0x8140 +#define ENETC_PM0_RMCA 0x8148 +#define ENETC_PM0_RBCA 0x8150 +#define ENETC_PM0_RDRP 0x8158 +#define ENETC_PM0_RPKT 0x8160 +#define ENETC_PM0_RUND 0x8168 +#define ENETC_PM0_R64 0x8170 +#define ENETC_PM0_R127 0x8178 +#define ENETC_PM0_R255 0x8180 +#define ENETC_PM0_R511 0x8188 +#define ENETC_PM0_R1023 0x8190 +#define ENETC_PM0_R1518 0x8198 +#define ENETC_PM0_R1519X 0x81A0 +#define ENETC_PM0_ROVR 0x81A8 +#define ENETC_PM0_RJBR 0x81B0 +#define ENETC_PM0_RFRG 0x81B8 +#define ENETC_PM0_RCNP 0x81C0 +#define ENETC_PM0_RDRNTP 0x81C8 +#define ENETC_PM0_TEOCT 0x8200 +#define ENETC_PM0_TOCT 0x8208 +#define ENETC_PM0_TCRSE 0x8210 +#define ENETC_PM0_TXPF 0x8218 +#define ENETC_PM0_TFRM 0x8220 +#define ENETC_PM0_TFCS 0x8228 +#define ENETC_PM0_TVLAN 0x8230 +#define ENETC_PM0_TERR 0x8238 +#define ENETC_PM0_TUCA 0x8240 +#define ENETC_PM0_TMCA 0x8248 +#define ENETC_PM0_TBCA 0x8250 +#define ENETC_PM0_TPKT 0x8260 +#define ENETC_PM0_TUND 0x8268 +#define ENETC_PM0_T127 0x8278 +#define ENETC_PM0_T1023 0x8290 +#define ENETC_PM0_T1518 0x8298 +#define ENETC_PM0_TCNP 0x82C0 +#define ENETC_PM0_TDFR 0x82D0 +#define ENETC_PM0_TMCOL 0x82D8 +#define ENETC_PM0_TSCOL 0x82E0 +#define ENETC_PM0_TLCOL 0x82E8 +#define ENETC_PM0_TECOL 0x82F0 + +/* Port counters */ +#define ENETC_PICDR(n) (0x0700 + (n) * 8) /* n = [0..3] */ +#define ENETC_PBFDSIR 0x0810 +#define ENETC_PFDMSAPR 0x0814 +#define ENETC_UFDMF 0x1680 +#define ENETC_MFDMF 0x1684 +#define ENETC_PUFDVFR 0x1780 +#define ENETC_PMFDVFR 0x1784 +#define ENETC_PBFDVFR 0x1788 + +/** Global regs, offset: 2_0000h */ +#define ENETC_GLOBAL_BASE 0x20000 +#define ENETC_G_EIPBRR0 0x0bf8 +#define ENETC_G_EIPBRR1 0x0bfc +#define ENETC_G_EPFBLPR(n) (0xd00 + 4 * (n)) +#define ENETC_G_EPFBLPR1_XGMII 0x80000000 + +/* PCI device info */ +struct enetc_hw { + /* SI registers, used by all PCI functions */ + void __iomem *reg; + /* Port registers, PF only */ + void __iomem *port; + /* IP global registers, PF only */ + void __iomem *global; +}; + +/* general register accessors */ +#define enetc_rd_reg(reg) ioread32((reg)) +#define enetc_wr_reg(reg, val) iowrite32((val), (reg)) +#ifdef ioread64 +#define enetc_rd_reg64(reg) ioread64((reg)) +#else +/* using this to read out stats on 32b systems */ +static inline u64 enetc_rd_reg64(void __iomem *reg) +{ + u32 low, high, tmp; + + do { + high = ioread32(reg + 4); + low = ioread32(reg); + tmp = ioread32(reg + 4); + } while (high != tmp); + + return le64_to_cpu((__le64)high << 32 | low); +} +#endif + +#define enetc_rd(hw, off) enetc_rd_reg((hw)->reg + (off)) +#define enetc_wr(hw, off, val) enetc_wr_reg((hw)->reg + (off), val) +#define enetc_rd64(hw, off) enetc_rd_reg64((hw)->reg + (off)) +/* port register accessors - PF only */ +#define enetc_port_rd(hw, off) enetc_rd_reg((hw)->port + (off)) +#define enetc_port_wr(hw, off, val) enetc_wr_reg((hw)->port + (off), val) +/* global register accessors - PF only */ +#define enetc_global_rd(hw, off) enetc_rd_reg((hw)->global + (off)) +#define enetc_global_wr(hw, off, val) enetc_wr_reg((hw)->global + (off), val) +/* BDR register accessors, see ENETC_BDR() */ +#define enetc_bdr_rd(hw, t, n, off) \ + enetc_rd(hw, ENETC_BDR(t, n, off)) +#define enetc_bdr_wr(hw, t, n, off, val) \ + enetc_wr(hw, ENETC_BDR(t, n, off), val) +#define enetc_txbdr_rd(hw, n, off) enetc_bdr_rd(hw, TX, n, off) +#define enetc_rxbdr_rd(hw, n, off) enetc_bdr_rd(hw, RX, n, off) +#define enetc_txbdr_wr(hw, n, off, val) \ + enetc_bdr_wr(hw, TX, n, off, val) +#define enetc_rxbdr_wr(hw, n, off, val) \ + enetc_bdr_wr(hw, RX, n, off, val) + +/* Buffer Descriptors (BD) */ +union enetc_tx_bd { + struct { + __le64 addr; + __le16 buf_len; + __le16 frm_len; + union { + struct { + __le16 l3_csoff; + u8 l4_csoff; + u8 flags; + }; /* default layout */ + __le32 lstatus; + }; + }; + struct { + __le32 tstamp; + __le16 tpid; + __le16 vid; + u8 reserved[6]; + u8 e_flags; + u8 flags; + } ext; /* Tx BD extension */ +}; + +#define ENETC_TXBD_FLAGS_L4CS BIT(0) +#define ENETC_TXBD_FLAGS_W BIT(2) +#define ENETC_TXBD_FLAGS_CSUM BIT(3) +#define ENETC_TXBD_FLAGS_EX BIT(6) +#define ENETC_TXBD_FLAGS_F BIT(7) + +static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) +{ + memset(txbd, 0, sizeof(*txbd)); +} + +/* L3 csum flags */ +#define ENETC_TXBD_L3_IPCS BIT(7) +#define ENETC_TXBD_L3_IPV6 BIT(15) + +#define ENETC_TXBD_L3_START_MASK GENMASK(6, 0) +#define ENETC_TXBD_L3_SET_HSIZE(val) ((((val) >> 2) & 0x7f) << 8) + +/* Extension flags */ +#define ENETC_TXBD_E_FLAGS_VLAN_INS BIT(0) +#define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP BIT(2) + +static inline __le16 enetc_txbd_l3_csoff(int start, int hdr_sz, u16 l3_flags) +{ + return cpu_to_le16(l3_flags | ENETC_TXBD_L3_SET_HSIZE(hdr_sz) | + (start & ENETC_TXBD_L3_START_MASK)); +} + +/* L4 csum flags */ +#define ENETC_TXBD_L4_UDP BIT(5) +#define ENETC_TXBD_L4_TCP BIT(6) + +union enetc_rx_bd { + struct { + __le64 addr; + u8 reserved[8]; + } w; + struct { + __le16 inet_csum; + __le16 parse_summary; + __le32 rss_hash; + __le16 buf_len; + __le16 vlan_opt; + union { + struct { + __le16 flags; + __le16 error; + }; + __le32 lstatus; + }; + } r; +}; + +#define ENETC_RXBD_LSTATUS_R BIT(30) +#define ENETC_RXBD_LSTATUS_F BIT(31) +#define ENETC_RXBD_ERR_MASK 0xff +#define ENETC_RXBD_LSTATUS(flags) ((flags) << 16) +#define ENETC_RXBD_FLAG_VLAN BIT(9) +#define ENETC_RXBD_FLAG_TSTMP BIT(10) + +#define ENETC_MAC_ADDR_FILT_CNT 8 /* # of supported entries per port */ +#define EMETC_MAC_ADDR_FILT_RES 3 /* # of reserved entries at the beginning */ +#define ENETC_MAX_NUM_VFS 2 + +struct enetc_cbd { + union { + struct { + __le32 addr[2]; + __le32 opt[4]; + }; + __le32 data[6]; + }; + __le16 index; + __le16 length; + u8 cmd; + u8 cls; + u8 _res; + u8 status_flags; +}; + +#define ENETC_CBD_FLAGS_SF BIT(7) /* short format */ +#define ENETC_CBD_STATUS_MASK 0xf + +struct enetc_cmd_rfse { + u8 smac_h[6]; + u8 smac_m[6]; + u8 dmac_h[6]; + u8 dmac_m[6]; + u32 sip_h[4]; + u32 sip_m[4]; + u32 dip_h[4]; + u32 dip_m[4]; + u16 ethtype_h; + u16 ethtype_m; + u16 ethtype4_h; + u16 ethtype4_m; + u16 sport_h; + u16 sport_m; + u16 dport_h; + u16 dport_m; + u16 vlan_h; + u16 vlan_m; + u8 proto_h; + u8 proto_m; + u16 flags; + u16 result; + u16 mode; +}; + +#define ENETC_RFSE_EN BIT(15) +#define ENETC_RFSE_MODE_BD 2 + +static inline void enetc_get_primary_mac_addr(struct enetc_hw *hw, u8 *addr) +{ + *(u32 *)addr = __raw_readl(hw->reg + ENETC_SIPMAR0); + *(u16 *)(addr + 4) = __raw_readw(hw->reg + ENETC_SIPMAR1); +} + +#define ENETC_SI_INT_IDX 0 +/* base index for Rx/Tx interrupts */ +#define ENETC_BDR_INT_BASE_IDX 1 + +/* Messaging */ + +/* Command completion status */ +enum enetc_msg_cmd_status { + ENETC_MSG_CMD_STATUS_OK, + ENETC_MSG_CMD_STATUS_FAIL +}; + +/* VSI-PSI command message types */ +enum enetc_msg_cmd_type { + ENETC_MSG_CMD_MNG_MAC = 1, /* manage MAC address */ + ENETC_MSG_CMD_MNG_RX_MAC_FILTER,/* manage RX MAC table */ + ENETC_MSG_CMD_MNG_RX_VLAN_FILTER /* manage RX VLAN table */ +}; + +/* VSI-PSI command action types */ +enum enetc_msg_cmd_action_type { + ENETC_MSG_CMD_MNG_ADD = 1, + ENETC_MSG_CMD_MNG_REMOVE +}; + +/* PSI-VSI command header format */ +struct enetc_msg_cmd_header { + u16 type; /* command class type */ + u16 id; /* denotes the specific required action */ +}; + +/* Common H/W utility functions */ + +static inline void enetc_enable_rxvlan(struct enetc_hw *hw, int si_idx, + bool en) +{ + u32 val = enetc_rxbdr_rd(hw, si_idx, ENETC_RBMR); + + val = (val & ~ENETC_RBMR_VTE) | (en ? ENETC_RBMR_VTE : 0); + enetc_rxbdr_wr(hw, si_idx, ENETC_RBMR, val); +} + +static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx, + bool en) +{ + u32 val = enetc_txbdr_rd(hw, si_idx, ENETC_TBMR); + + val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0); + enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val); +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc_msg.c b/drivers/net/ethernet/freescale/enetc/enetc_msg.c new file mode 100644 index 000000000000..40d22ebe9224 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_msg.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include "enetc_pf.h" + +static void enetc_msg_disable_mr_int(struct enetc_hw *hw) +{ + u32 psiier = enetc_rd(hw, ENETC_PSIIER); + /* disable MR int source(s) */ + enetc_wr(hw, ENETC_PSIIER, psiier & ~ENETC_PSIIER_MR_MASK); +} + +static void enetc_msg_enable_mr_int(struct enetc_hw *hw) +{ + u32 psiier = enetc_rd(hw, ENETC_PSIIER); + + enetc_wr(hw, ENETC_PSIIER, psiier | ENETC_PSIIER_MR_MASK); +} + +static irqreturn_t enetc_msg_psi_msix(int irq, void *data) +{ + struct enetc_si *si = (struct enetc_si *)data; + struct enetc_pf *pf = enetc_si_priv(si); + + enetc_msg_disable_mr_int(&si->hw); + schedule_work(&pf->msg_task); + + return IRQ_HANDLED; +} + +static void enetc_msg_task(struct work_struct *work) +{ + struct enetc_pf *pf = container_of(work, struct enetc_pf, msg_task); + struct enetc_hw *hw = &pf->si->hw; + unsigned long mr_mask; + int i; + + for (;;) { + mr_mask = enetc_rd(hw, ENETC_PSIMSGRR) & ENETC_PSIMSGRR_MR_MASK; + if (!mr_mask) { + /* re-arm MR interrupts, w1c the IDR reg */ + enetc_wr(hw, ENETC_PSIIDR, ENETC_PSIIER_MR_MASK); + enetc_msg_enable_mr_int(hw); + return; + } + + for (i = 0; i < pf->num_vfs; i++) { + u32 psimsgrr; + u16 msg_code; + + if (!(ENETC_PSIMSGRR_MR(i) & mr_mask)) + continue; + + enetc_msg_handle_rxmsg(pf, i, &msg_code); + + psimsgrr = ENETC_SIMSGSR_SET_MC(msg_code); + psimsgrr |= ENETC_PSIMSGRR_MR(i); /* w1c */ + enetc_wr(hw, ENETC_PSIMSGRR, psimsgrr); + } + } +} + +/* Init */ +static int enetc_msg_alloc_mbx(struct enetc_si *si, int idx) +{ + struct enetc_pf *pf = enetc_si_priv(si); + struct device *dev = &si->pdev->dev; + struct enetc_hw *hw = &si->hw; + struct enetc_msg_swbd *msg; + u32 val; + + msg = &pf->rxmsg[idx]; + /* allocate and set receive buffer */ + msg->size = ENETC_DEFAULT_MSG_SIZE; + + msg->vaddr = dma_alloc_coherent(dev, msg->size, &msg->dma, + GFP_KERNEL); + if (!msg->vaddr) { + dev_err(dev, "msg: fail to alloc dma buffer of size: %d\n", + msg->size); + return -ENOMEM; + } + + /* set multiple of 32 bytes */ + val = lower_32_bits(msg->dma); + enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), val); + val = upper_32_bits(msg->dma); + enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), val); + + return 0; +} + +static void enetc_msg_free_mbx(struct enetc_si *si, int idx) +{ + struct enetc_pf *pf = enetc_si_priv(si); + struct enetc_hw *hw = &si->hw; + struct enetc_msg_swbd *msg; + + msg = &pf->rxmsg[idx]; + dma_free_coherent(&si->pdev->dev, msg->size, msg->vaddr, msg->dma); + memset(msg, 0, sizeof(*msg)); + + enetc_wr(hw, ENETC_PSIVMSGRCVAR0(idx), 0); + enetc_wr(hw, ENETC_PSIVMSGRCVAR1(idx), 0); +} + +int enetc_msg_psi_init(struct enetc_pf *pf) +{ + struct enetc_si *si = pf->si; + int vector, i, err; + + /* register message passing interrupt handler */ + snprintf(pf->msg_int_name, sizeof(pf->msg_int_name), "%s-vfmsg", + si->ndev->name); + vector = pci_irq_vector(si->pdev, ENETC_SI_INT_IDX); + err = request_irq(vector, enetc_msg_psi_msix, 0, pf->msg_int_name, si); + if (err) { + dev_err(&si->pdev->dev, + "PSI messaging: request_irq() failed!\n"); + return err; + } + + /* set one IRQ entry for PSI message receive notification (SI int) */ + enetc_wr(&si->hw, ENETC_SIMSIVR, ENETC_SI_INT_IDX); + + /* initialize PSI mailbox */ + INIT_WORK(&pf->msg_task, enetc_msg_task); + + for (i = 0; i < pf->num_vfs; i++) { + err = enetc_msg_alloc_mbx(si, i); + if (err) + goto err_init_mbx; + } + + /* enable MR interrupts */ + enetc_msg_enable_mr_int(&si->hw); + + return 0; + +err_init_mbx: + for (i--; i >= 0; i--) + enetc_msg_free_mbx(si, i); + + free_irq(vector, si); + + return err; +} + +void enetc_msg_psi_free(struct enetc_pf *pf) +{ + struct enetc_si *si = pf->si; + int i; + + cancel_work_sync(&pf->msg_task); + + /* disable MR interrupts */ + enetc_msg_disable_mr_int(&si->hw); + + for (i = 0; i < pf->num_vfs; i++) + enetc_msg_free_mbx(si, i); + + /* de-register message passing interrupt handler */ + free_irq(pci_irq_vector(si->pdev, ENETC_SI_INT_IDX), si); +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c new file mode 100644 index 000000000000..7d28f5e9b46b --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -0,0 +1,931 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include +#include +#include +#include "enetc_pf.h" + +#define ENETC_DRV_VER_MAJ 1 +#define ENETC_DRV_VER_MIN 0 + +#define ENETC_DRV_VER_STR __stringify(ENETC_DRV_VER_MAJ) "." \ + __stringify(ENETC_DRV_VER_MIN) +static const char enetc_drv_ver[] = ENETC_DRV_VER_STR; +#define ENETC_DRV_NAME_STR "ENETC PF driver" +static const char enetc_drv_name[] = ENETC_DRV_NAME_STR; + +static void enetc_pf_get_primary_mac_addr(struct enetc_hw *hw, int si, u8 *addr) +{ + u32 upper = __raw_readl(hw->port + ENETC_PSIPMAR0(si)); + u16 lower = __raw_readw(hw->port + ENETC_PSIPMAR1(si)); + + *(u32 *)addr = upper; + *(u16 *)(addr + 4) = lower; +} + +static void enetc_pf_set_primary_mac_addr(struct enetc_hw *hw, int si, + const u8 *addr) +{ + u32 upper = *(const u32 *)addr; + u16 lower = *(const u16 *)(addr + 4); + + __raw_writel(upper, hw->port + ENETC_PSIPMAR0(si)); + __raw_writew(lower, hw->port + ENETC_PSIPMAR1(si)); +} + +static int enetc_pf_set_mac_addr(struct net_device *ndev, void *addr) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(ndev->dev_addr, saddr->sa_data, ndev->addr_len); + enetc_pf_set_primary_mac_addr(&priv->si->hw, 0, saddr->sa_data); + + return 0; +} + +static void enetc_set_vlan_promisc(struct enetc_hw *hw, char si_map) +{ + u32 val = enetc_port_rd(hw, ENETC_PSIPVMR); + + val &= ~ENETC_PSIPVMR_SET_VP(ENETC_VLAN_PROMISC_MAP_ALL); + enetc_port_wr(hw, ENETC_PSIPVMR, ENETC_PSIPVMR_SET_VP(si_map) | val); +} + +static bool enetc_si_vlan_promisc_is_on(struct enetc_pf *pf, int si_idx) +{ + return pf->vlan_promisc_simap & BIT(si_idx); +} + +static bool enetc_vlan_filter_is_on(struct enetc_pf *pf) +{ + int i; + + for_each_set_bit(i, pf->active_vlans, VLAN_N_VID) + return true; + + return false; +} + +static void enetc_enable_si_vlan_promisc(struct enetc_pf *pf, int si_idx) +{ + pf->vlan_promisc_simap |= BIT(si_idx); + enetc_set_vlan_promisc(&pf->si->hw, pf->vlan_promisc_simap); +} + +static void enetc_disable_si_vlan_promisc(struct enetc_pf *pf, int si_idx) +{ + pf->vlan_promisc_simap &= ~BIT(si_idx); + enetc_set_vlan_promisc(&pf->si->hw, pf->vlan_promisc_simap); +} + +static void enetc_set_isol_vlan(struct enetc_hw *hw, int si, u16 vlan, u8 qos) +{ + u32 val = 0; + + if (vlan) + val = ENETC_PSIVLAN_EN | ENETC_PSIVLAN_SET_QOS(qos) | vlan; + + enetc_port_wr(hw, ENETC_PSIVLANR(si), val); +} + +static int enetc_mac_addr_hash_idx(const u8 *addr) +{ + u64 fold = __swab64(ether_addr_to_u64(addr)) >> 16; + u64 mask = 0; + int res = 0; + int i; + + for (i = 0; i < 8; i++) + mask |= BIT_ULL(i * 6); + + for (i = 0; i < 6; i++) + res |= (hweight64(fold & (mask << i)) & 0x1) << i; + + return res; +} + +static void enetc_reset_mac_addr_filter(struct enetc_mac_filter *filter) +{ + filter->mac_addr_cnt = 0; + + bitmap_zero(filter->mac_hash_table, + ENETC_MADDR_HASH_TBL_SZ); +} + +static void enetc_add_mac_addr_em_filter(struct enetc_mac_filter *filter, + const unsigned char *addr) +{ + /* add exact match addr */ + ether_addr_copy(filter->mac_addr, addr); + filter->mac_addr_cnt++; +} + +static void enetc_add_mac_addr_ht_filter(struct enetc_mac_filter *filter, + const unsigned char *addr) +{ + int idx = enetc_mac_addr_hash_idx(addr); + + /* add hash table entry */ + __set_bit(idx, filter->mac_hash_table); + filter->mac_addr_cnt++; +} + +static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type) +{ + bool err = si->errata & ENETC_ERR_UCMCSWP; + + if (type == UC) { + enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), 0); + enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), 0); + } else { /* MC */ + enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), 0); + enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), 0); + } +} + +static void enetc_set_mac_ht_flt(struct enetc_si *si, int si_idx, int type, + u32 *hash) +{ + bool err = si->errata & ENETC_ERR_UCMCSWP; + + if (type == UC) { + enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), *hash); + enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), *(hash + 1)); + } else { /* MC */ + enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), *hash); + enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), *(hash + 1)); + } +} + +static void enetc_sync_mac_filters(struct enetc_pf *pf) +{ + struct enetc_mac_filter *f = pf->mac_filter; + struct enetc_si *si = pf->si; + int i, pos; + + pos = EMETC_MAC_ADDR_FILT_RES; + + for (i = 0; i < MADDR_TYPE; i++, f++) { + bool em = (f->mac_addr_cnt == 1) && (i == UC); + bool clear = !f->mac_addr_cnt; + + if (clear) { + if (i == UC) + enetc_clear_mac_flt_entry(si, pos); + + enetc_clear_mac_ht_flt(si, 0, i); + continue; + } + + /* exact match filter */ + if (em) { + int err; + + enetc_clear_mac_ht_flt(si, 0, UC); + + err = enetc_set_mac_flt_entry(si, pos, f->mac_addr, + BIT(0)); + if (!err) + continue; + + /* fallback to HT filtering */ + dev_warn(&si->pdev->dev, "fallback to HT filt (%d)\n", + err); + } + + /* hash table filter, clear EM filter for UC entries */ + if (i == UC) + enetc_clear_mac_flt_entry(si, pos); + + enetc_set_mac_ht_flt(si, 0, i, (u32 *)f->mac_hash_table); + } +} + +static void enetc_pf_set_rx_mode(struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + struct enetc_hw *hw = &priv->si->hw; + bool uprom = false, mprom = false; + struct enetc_mac_filter *filter; + struct netdev_hw_addr *ha; + u32 psipmr = 0; + bool em; + + if (ndev->flags & IFF_PROMISC) { + /* enable promisc mode for SI0 (PF) */ + psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0); + uprom = true; + mprom = true; + /* enable VLAN promisc mode for SI0 */ + if (!enetc_si_vlan_promisc_is_on(pf, 0)) + enetc_enable_si_vlan_promisc(pf, 0); + + } else if (ndev->flags & IFF_ALLMULTI) { + /* enable multi cast promisc mode for SI0 (PF) */ + psipmr = ENETC_PSIPMR_SET_MP(0); + mprom = true; + } + + /* first 2 filter entries belong to PF */ + if (!uprom) { + /* Update unicast filters */ + filter = &pf->mac_filter[UC]; + enetc_reset_mac_addr_filter(filter); + + em = (netdev_uc_count(ndev) == 1); + netdev_for_each_uc_addr(ha, ndev) { + if (em) { + enetc_add_mac_addr_em_filter(filter, ha->addr); + break; + } + + enetc_add_mac_addr_ht_filter(filter, ha->addr); + } + } + + if (!mprom) { + /* Update multicast filters */ + filter = &pf->mac_filter[MC]; + enetc_reset_mac_addr_filter(filter); + + netdev_for_each_mc_addr(ha, ndev) { + if (!is_multicast_ether_addr(ha->addr)) + continue; + + enetc_add_mac_addr_ht_filter(filter, ha->addr); + } + } + + if (!uprom || !mprom) + /* update PF entries */ + enetc_sync_mac_filters(pf); + + psipmr |= enetc_port_rd(hw, ENETC_PSIPMR) & + ~(ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0)); + enetc_port_wr(hw, ENETC_PSIPMR, psipmr); +} + +static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx, + u32 *hash) +{ + enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), *hash); + enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), *(hash + 1)); +} + +static int enetc_vid_hash_idx(unsigned int vid) +{ + int res = 0; + int i; + + for (i = 0; i < 6; i++) + res |= (hweight8(vid & (BIT(i) | BIT(i + 6))) & 0x1) << i; + + return res; +} + +static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash) +{ + int i; + + if (rehash) { + bitmap_zero(pf->vlan_ht_filter, ENETC_VLAN_HT_SIZE); + + for_each_set_bit(i, pf->active_vlans, VLAN_N_VID) { + int hidx = enetc_vid_hash_idx(i); + + __set_bit(hidx, pf->vlan_ht_filter); + } + } + + enetc_set_vlan_ht_filter(&pf->si->hw, 0, (u32 *)pf->vlan_ht_filter); +} + +static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + int idx; + + if (enetc_si_vlan_promisc_is_on(pf, 0)) + enetc_disable_si_vlan_promisc(pf, 0); + + __set_bit(vid, pf->active_vlans); + + idx = enetc_vid_hash_idx(vid); + if (!__test_and_set_bit(idx, pf->vlan_ht_filter)) + enetc_sync_vlan_ht_filter(pf, false); + + return 0; +} + +static int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + + __clear_bit(vid, pf->active_vlans); + enetc_sync_vlan_ht_filter(pf, true); + + if (!enetc_vlan_filter_is_on(pf)) + enetc_enable_si_vlan_promisc(pf, 0); + + return 0; +} + +static void enetc_set_loopback(struct net_device *ndev, bool en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + u32 reg; + + reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE); + if (reg & ENETC_PMO_IFM_RG) { + /* RGMII mode */ + reg = (reg & ~ENETC_PM0_IFM_RLP) | + (en ? ENETC_PM0_IFM_RLP : 0); + enetc_port_wr(hw, ENETC_PM0_IF_MODE, reg); + } else { + /* assume SGMII mode */ + reg = enetc_port_rd(hw, ENETC_PM0_CMD_CFG); + reg = (reg & ~ENETC_PM0_CMD_XGLP) | + (en ? ENETC_PM0_CMD_XGLP : 0); + reg = (reg & ~ENETC_PM0_CMD_PHY_TX_EN) | + (en ? ENETC_PM0_CMD_PHY_TX_EN : 0); + enetc_port_wr(hw, ENETC_PM0_CMD_CFG, reg); + enetc_port_wr(hw, ENETC_PM1_CMD_CFG, reg); + } +} + +static int enetc_pf_set_vf_mac(struct net_device *ndev, int vf, u8 *mac) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + struct enetc_vf_state *vf_state; + + if (vf >= pf->total_vfs) + return -EINVAL; + + if (!is_valid_ether_addr(mac)) + return -EADDRNOTAVAIL; + + vf_state = &pf->vf_state[vf]; + vf_state->flags |= ENETC_VF_FLAG_PF_SET_MAC; + enetc_pf_set_primary_mac_addr(&priv->si->hw, vf + 1, mac); + return 0; +} + +static int enetc_pf_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, + u8 qos, __be16 proto) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + + if (priv->si->errata & ENETC_ERR_VLAN_ISOL) + return -EOPNOTSUPP; + + if (vf >= pf->total_vfs) + return -EINVAL; + + if (proto != htons(ETH_P_8021Q)) + /* only C-tags supported for now */ + return -EPROTONOSUPPORT; + + enetc_set_isol_vlan(&priv->si->hw, vf + 1, vlan, qos); + return 0; +} + +static int enetc_pf_set_vf_spoofchk(struct net_device *ndev, int vf, bool en) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_pf *pf = enetc_si_priv(priv->si); + u32 cfgr; + + if (vf >= pf->total_vfs) + return -EINVAL; + + cfgr = enetc_port_rd(&priv->si->hw, ENETC_PSICFGR0(vf + 1)); + cfgr = (cfgr & ~ENETC_PSICFGR0_ASE) | (en ? ENETC_PSICFGR0_ASE : 0); + enetc_port_wr(&priv->si->hw, ENETC_PSICFGR0(vf + 1), cfgr); + + return 0; +} + +static void enetc_port_setup_primary_mac_address(struct enetc_si *si) +{ + unsigned char mac_addr[MAX_ADDR_LEN]; + struct enetc_pf *pf = enetc_si_priv(si); + struct enetc_hw *hw = &si->hw; + int i; + + /* check MAC addresses for PF and all VFs, if any is 0 set it ro rand */ + for (i = 0; i < pf->total_vfs + 1; i++) { + enetc_pf_get_primary_mac_addr(hw, i, mac_addr); + if (!is_zero_ether_addr(mac_addr)) + continue; + eth_random_addr(mac_addr); + dev_info(&si->pdev->dev, "no MAC address specified for SI%d, using %pM\n", + i, mac_addr); + enetc_pf_set_primary_mac_addr(hw, i, mac_addr); + } +} + +static void enetc_port_assign_rfs_entries(struct enetc_si *si) +{ + struct enetc_pf *pf = enetc_si_priv(si); + struct enetc_hw *hw = &si->hw; + int num_entries, vf_entries, i; + u32 val; + + /* split RFS entries between functions */ + val = enetc_port_rd(hw, ENETC_PRFSCAPR); + num_entries = ENETC_PRFSCAPR_GET_NUM_RFS(val); + vf_entries = num_entries / (pf->total_vfs + 1); + + for (i = 0; i < pf->total_vfs; i++) + enetc_port_wr(hw, ENETC_PSIRFSCFGR(i + 1), vf_entries); + enetc_port_wr(hw, ENETC_PSIRFSCFGR(0), + num_entries - vf_entries * pf->total_vfs); + + /* enable RFS on port */ + enetc_port_wr(hw, ENETC_PRFSMR, ENETC_PRFSMR_RFSE); +} + +static void enetc_port_si_configure(struct enetc_si *si) +{ + struct enetc_pf *pf = enetc_si_priv(si); + struct enetc_hw *hw = &si->hw; + int num_rings, i; + u32 val; + + val = enetc_port_rd(hw, ENETC_PCAPR0); + num_rings = min(ENETC_PCAPR0_RXBDR(val), ENETC_PCAPR0_TXBDR(val)); + + val = ENETC_PSICFGR0_SET_TXBDR(ENETC_PF_NUM_RINGS); + val |= ENETC_PSICFGR0_SET_RXBDR(ENETC_PF_NUM_RINGS); + + if (unlikely(num_rings < ENETC_PF_NUM_RINGS)) { + val = ENETC_PSICFGR0_SET_TXBDR(num_rings); + val |= ENETC_PSICFGR0_SET_RXBDR(num_rings); + + dev_warn(&si->pdev->dev, "Found %d rings, expected %d!\n", + num_rings, ENETC_PF_NUM_RINGS); + + num_rings = 0; + } + + /* Add default one-time settings for SI0 (PF) */ + val |= ENETC_PSICFGR0_SIVC(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S); + + enetc_port_wr(hw, ENETC_PSICFGR0(0), val); + + if (num_rings) + num_rings -= ENETC_PF_NUM_RINGS; + + /* Configure the SIs for each available VF */ + val = ENETC_PSICFGR0_SIVC(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S); + val |= ENETC_PSICFGR0_VTE | ENETC_PSICFGR0_SIVIE; + + if (num_rings) { + num_rings /= pf->total_vfs; + val |= ENETC_PSICFGR0_SET_TXBDR(num_rings); + val |= ENETC_PSICFGR0_SET_RXBDR(num_rings); + } + + for (i = 0; i < pf->total_vfs; i++) + enetc_port_wr(hw, ENETC_PSICFGR0(i + 1), val); + + /* Port level VLAN settings */ + val = ENETC_PVCLCTR_OVTPIDL(ENETC_VLAN_TYPE_C | ENETC_VLAN_TYPE_S); + enetc_port_wr(hw, ENETC_PVCLCTR, val); + /* use outer tag for VLAN filtering */ + enetc_port_wr(hw, ENETC_PSIVLANFMR, ENETC_PSIVLANFMR_VS); +} + +static void enetc_configure_port_mac(struct enetc_hw *hw) +{ + enetc_port_wr(hw, ENETC_PM0_MAXFRM, + ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE)); + + enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE); + enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE); + + enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | + ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC | + ENETC_PM0_TX_EN | ENETC_PM0_RX_EN); + + enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | + ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC | + ENETC_PM0_TX_EN | ENETC_PM0_RX_EN); + /* set auto-speed for RGMII */ + if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG) + enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO); + if (enetc_global_rd(hw, ENETC_G_EPFBLPR(1)) == ENETC_G_EPFBLPR1_XGMII) + enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII); +} + +static void enetc_configure_port_pmac(struct enetc_hw *hw) +{ + u32 temp; + + /* Set pMAC step lock */ + temp = enetc_port_rd(hw, ENETC_PFPMR); + enetc_port_wr(hw, ENETC_PFPMR, + temp | ENETC_PFPMR_PMACE | ENETC_PFPMR_MWLM); + + temp = enetc_port_rd(hw, ENETC_MMCSR); + enetc_port_wr(hw, ENETC_MMCSR, temp | ENETC_MMCSR_ME); +} + +static void enetc_configure_port(struct enetc_pf *pf) +{ + u8 hash_key[ENETC_RSSHASH_KEY_SIZE]; + struct enetc_hw *hw = &pf->si->hw; + + enetc_configure_port_pmac(hw); + + enetc_configure_port_mac(hw); + + enetc_port_si_configure(pf->si); + + /* set up hash key */ + get_random_bytes(hash_key, ENETC_RSSHASH_KEY_SIZE); + enetc_set_rss_key(hw, hash_key); + + /* split up RFS entries */ + enetc_port_assign_rfs_entries(pf->si); + + /* fix-up primary MAC addresses, if not set already */ + enetc_port_setup_primary_mac_address(pf->si); + + /* enforce VLAN promisc mode for all SIs */ + pf->vlan_promisc_simap = ENETC_VLAN_PROMISC_MAP_ALL; + enetc_set_vlan_promisc(hw, pf->vlan_promisc_simap); + + enetc_port_wr(hw, ENETC_PSIPMR, 0); + + /* enable port */ + enetc_port_wr(hw, ENETC_PMR, ENETC_PMR_EN); +} + +/* Messaging */ +static u16 enetc_msg_pf_set_vf_primary_mac_addr(struct enetc_pf *pf, + int vf_id) +{ + struct enetc_vf_state *vf_state = &pf->vf_state[vf_id]; + struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id]; + struct enetc_msg_cmd_set_primary_mac *cmd; + struct device *dev = &pf->si->pdev->dev; + u16 cmd_id; + char *addr; + + cmd = (struct enetc_msg_cmd_set_primary_mac *)msg->vaddr; + cmd_id = cmd->header.id; + if (cmd_id != ENETC_MSG_CMD_MNG_ADD) + return ENETC_MSG_CMD_STATUS_FAIL; + + addr = cmd->mac.sa_data; + if (vf_state->flags & ENETC_VF_FLAG_PF_SET_MAC) + dev_warn(dev, "Attempt to override PF set mac addr for VF%d\n", + vf_id); + else + enetc_pf_set_primary_mac_addr(&pf->si->hw, vf_id + 1, addr); + + return ENETC_MSG_CMD_STATUS_OK; +} + +void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int vf_id, u16 *status) +{ + struct enetc_msg_swbd *msg = &pf->rxmsg[vf_id]; + struct device *dev = &pf->si->pdev->dev; + struct enetc_msg_cmd_header *cmd_hdr; + u16 cmd_type; + + *status = ENETC_MSG_CMD_STATUS_OK; + cmd_hdr = (struct enetc_msg_cmd_header *)msg->vaddr; + cmd_type = cmd_hdr->type; + + switch (cmd_type) { + case ENETC_MSG_CMD_MNG_MAC: + *status = enetc_msg_pf_set_vf_primary_mac_addr(pf, vf_id); + break; + default: + dev_err(dev, "command not supported (cmd_type: 0x%x)\n", + cmd_type); + } +} + +#ifdef CONFIG_PCI_IOV +static int enetc_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct enetc_si *si = pci_get_drvdata(pdev); + struct enetc_pf *pf = enetc_si_priv(si); + int err; + + if (!num_vfs) { + enetc_msg_psi_free(pf); + kfree(pf->vf_state); + pf->num_vfs = 0; + pci_disable_sriov(pdev); + } else { + pf->num_vfs = num_vfs; + + pf->vf_state = kcalloc(num_vfs, sizeof(struct enetc_vf_state), + GFP_KERNEL); + if (!pf->vf_state) { + pf->num_vfs = 0; + return -ENOMEM; + } + + err = enetc_msg_psi_init(pf); + if (err) { + dev_err(&pdev->dev, "enetc_msg_psi_init (%d)\n", err); + goto err_msg_psi; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_err(&pdev->dev, "pci_enable_sriov err %d\n", err); + goto err_en_sriov; + } + } + + return num_vfs; + +err_en_sriov: + enetc_msg_psi_free(pf); +err_msg_psi: + kfree(pf->vf_state); + pf->num_vfs = 0; + + return err; +} +#else +#define enetc_sriov_configure(pdev, num_vfs) (void)0 +#endif + +static int enetc_pf_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + enetc_enable_rxvlan(&priv->si->hw, 0, + !!(features & NETIF_F_HW_VLAN_CTAG_RX)); + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) + enetc_enable_txvlan(&priv->si->hw, 0, + !!(features & NETIF_F_HW_VLAN_CTAG_TX)); + + if (changed & NETIF_F_LOOPBACK) + enetc_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK)); + + return enetc_set_features(ndev, features); +} + +static const struct net_device_ops enetc_ndev_ops = { + .ndo_open = enetc_open, + .ndo_stop = enetc_close, + .ndo_start_xmit = enetc_xmit, + .ndo_get_stats = enetc_get_stats, + .ndo_set_mac_address = enetc_pf_set_mac_addr, + .ndo_set_rx_mode = enetc_pf_set_rx_mode, + .ndo_vlan_rx_add_vid = enetc_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = enetc_vlan_rx_del_vid, + .ndo_set_vf_mac = enetc_pf_set_vf_mac, + .ndo_set_vf_vlan = enetc_pf_set_vf_vlan, + .ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk, + .ndo_set_features = enetc_pf_set_features, +}; + +static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, + const struct net_device_ops *ndev_ops) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + SET_NETDEV_DEV(ndev, &si->pdev->dev); + priv->ndev = ndev; + priv->si = si; + priv->dev = &si->pdev->dev; + si->ndev = ndev; + + priv->msg_enable = (NETIF_MSG_WOL << 1) - 1; + ndev->netdev_ops = ndev_ops; + enetc_set_ethtool_ops(ndev); + ndev->watchdog_timeo = 5 * HZ; + ndev->max_mtu = ENETC_MAX_MTU; + + ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_LOOPBACK; + ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | + NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER; + + if (si->num_rss) + ndev->hw_features |= NETIF_F_RXHASH; + + if (si->errata & ENETC_ERR_TXCSUM) { + ndev->hw_features &= ~NETIF_F_HW_CSUM; + ndev->features &= ~NETIF_F_HW_CSUM; + } + + ndev->priv_flags |= IFF_UNICAST_FLT; + + /* pick up primary MAC address from SI */ + enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); +} + +static int enetc_of_get_phy(struct enetc_ndev_priv *priv) +{ + struct device_node *np = priv->dev->of_node; + int err; + + if (!np) { + dev_err(priv->dev, "missing ENETC port node\n"); + return -ENODEV; + } + + priv->phy_node = of_parse_phandle(np, "phy-handle", 0); + if (!priv->phy_node) { + if (!of_phy_is_fixed_link(np)) { + dev_err(priv->dev, "PHY not specified\n"); + return -ENODEV; + } + + err = of_phy_register_fixed_link(np); + if (err < 0) { + dev_err(priv->dev, "fixed link registration failed\n"); + return err; + } + + priv->phy_node = of_node_get(np); + } + + priv->if_mode = of_get_phy_mode(np); + if (priv->if_mode < 0) { + dev_err(priv->dev, "missing phy type\n"); + of_node_put(priv->phy_node); + if (of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); + + return -EINVAL; + } + + return 0; +} + +static void enetc_of_put_phy(struct enetc_ndev_priv *priv) +{ + struct device_node *np = priv->dev->of_node; + + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); + if (priv->phy_node) + of_node_put(priv->phy_node); +} + +static int enetc_pf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct enetc_ndev_priv *priv; + struct net_device *ndev; + struct enetc_si *si; + struct enetc_pf *pf; + int err; + + if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) { + dev_info(&pdev->dev, "device is disabled, skipping\n"); + return -ENODEV; + } + + err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); + if (err) { + dev_err(&pdev->dev, "PCI probing failed\n"); + return err; + } + + si = pci_get_drvdata(pdev); + if (!si->hw.port || !si->hw.global) { + err = -ENODEV; + dev_err(&pdev->dev, "could not map PF space, probing a VF?\n"); + goto err_map_pf_space; + } + + pf = enetc_si_priv(si); + pf->si = si; + pf->total_vfs = pci_sriov_get_totalvfs(pdev); + + enetc_configure_port(pf); + + enetc_get_si_caps(si); + + ndev = alloc_etherdev_mq(sizeof(*priv), ENETC_MAX_NUM_TXQS); + if (!ndev) { + err = -ENOMEM; + dev_err(&pdev->dev, "netdev creation failed\n"); + goto err_alloc_netdev; + } + + enetc_pf_netdev_setup(si, ndev, &enetc_ndev_ops); + + priv = netdev_priv(ndev); + + enetc_init_si_rings_params(priv); + + err = enetc_alloc_si_resources(priv); + if (err) { + dev_err(&pdev->dev, "SI resource alloc failed\n"); + goto err_alloc_si_res; + } + + err = enetc_alloc_msix(priv); + if (err) { + dev_err(&pdev->dev, "MSIX alloc failed\n"); + goto err_alloc_msix; + } + + err = enetc_of_get_phy(priv); + if (err) + dev_warn(&pdev->dev, "Fallback to PHY-less operation\n"); + + err = register_netdev(ndev); + if (err) + goto err_reg_netdev; + + netif_carrier_off(ndev); + + netif_info(priv, probe, ndev, "%s v%s\n", + enetc_drv_name, enetc_drv_ver); + + return 0; + +err_reg_netdev: + enetc_of_put_phy(priv); + enetc_free_msix(priv); +err_alloc_msix: + enetc_free_si_resources(priv); +err_alloc_si_res: + si->ndev = NULL; + free_netdev(ndev); +err_alloc_netdev: +err_map_pf_space: + enetc_pci_remove(pdev); + + return err; +} + +static void enetc_pf_remove(struct pci_dev *pdev) +{ + struct enetc_si *si = pci_get_drvdata(pdev); + struct enetc_pf *pf = enetc_si_priv(si); + struct enetc_ndev_priv *priv; + + if (pf->num_vfs) + enetc_sriov_configure(pdev, 0); + + priv = netdev_priv(si->ndev); + netif_info(priv, drv, si->ndev, "%s v%s remove\n", + enetc_drv_name, enetc_drv_ver); + + unregister_netdev(si->ndev); + + enetc_of_put_phy(priv); + + enetc_free_msix(priv); + + enetc_free_si_resources(priv); + + free_netdev(si->ndev); + + enetc_pci_remove(pdev); +} + +static const struct pci_device_id enetc_pf_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) }, + { 0, } /* End of table. */ +}; +MODULE_DEVICE_TABLE(pci, enetc_pf_id_table); + +static struct pci_driver enetc_pf_driver = { + .name = KBUILD_MODNAME, + .id_table = enetc_pf_id_table, + .probe = enetc_pf_probe, + .remove = enetc_pf_remove, +#ifdef CONFIG_PCI_IOV + .sriov_configure = enetc_sriov_configure, +#endif +}; +module_pci_driver(enetc_pf_driver); + +MODULE_DESCRIPTION(ENETC_DRV_NAME_STR); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(ENETC_DRV_VER_STR); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h new file mode 100644 index 000000000000..2061ae5ccba0 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2017-2019 NXP */ + +#include "enetc.h" + +#define ENETC_PF_NUM_RINGS 8 + +enum enetc_mac_addr_type {UC, MC, MADDR_TYPE}; +#define ENETC_MAX_NUM_MAC_FLT ((ENETC_MAX_NUM_VFS + 1) * MADDR_TYPE) + +#define ENETC_MADDR_HASH_TBL_SZ 64 +struct enetc_mac_filter { + union { + char mac_addr[ETH_ALEN]; + DECLARE_BITMAP(mac_hash_table, ENETC_MADDR_HASH_TBL_SZ); + }; + int mac_addr_cnt; +}; + +#define ENETC_VLAN_HT_SIZE 64 + +enum enetc_vf_flags { + ENETC_VF_FLAG_PF_SET_MAC = BIT(0), +}; + +struct enetc_vf_state { + enum enetc_vf_flags flags; +}; + +struct enetc_pf { + struct enetc_si *si; + int num_vfs; /* number of active VFs, after sriov_init */ + int total_vfs; /* max number of VFs, set for PF at probe */ + struct enetc_vf_state *vf_state; + + struct enetc_mac_filter mac_filter[ENETC_MAX_NUM_MAC_FLT]; + + struct enetc_msg_swbd rxmsg[ENETC_MAX_NUM_VFS]; + struct work_struct msg_task; + char msg_int_name[ENETC_INT_NAME_MAX]; + + char vlan_promisc_simap; /* bitmap of SIs in VLAN promisc mode */ + DECLARE_BITMAP(vlan_ht_filter, ENETC_VLAN_HT_SIZE); + DECLARE_BITMAP(active_vlans, VLAN_N_VID); +}; + +int enetc_msg_psi_init(struct enetc_pf *pf); +void enetc_msg_psi_free(struct enetc_pf *pf); +void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c new file mode 100644 index 000000000000..64bebee9f52a --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2017-2019 NXP */ + +#include +#include "enetc.h" + +#define ENETC_DRV_VER_MAJ 1 +#define ENETC_DRV_VER_MIN 0 + +#define ENETC_DRV_VER_STR __stringify(ENETC_DRV_VER_MAJ) "." \ + __stringify(ENETC_DRV_VER_MIN) +static const char enetc_drv_ver[] = ENETC_DRV_VER_STR; +#define ENETC_DRV_NAME_STR "ENETC VF driver" +static const char enetc_drv_name[] = ENETC_DRV_NAME_STR; + +/* Messaging */ +static void enetc_msg_vsi_write_msg(struct enetc_hw *hw, + struct enetc_msg_swbd *msg) +{ + u32 val; + + val = enetc_vsi_set_msize(msg->size) | lower_32_bits(msg->dma); + enetc_wr(hw, ENETC_VSIMSGSNDAR1, upper_32_bits(msg->dma)); + enetc_wr(hw, ENETC_VSIMSGSNDAR0, val); +} + +static int enetc_msg_vsi_send(struct enetc_si *si, struct enetc_msg_swbd *msg) +{ + int timeout = 100; + u32 vsimsgsr; + + enetc_msg_vsi_write_msg(&si->hw, msg); + + do { + vsimsgsr = enetc_rd(&si->hw, ENETC_VSIMSGSR); + if (!(vsimsgsr & ENETC_VSIMSGSR_MB)) + break; + + usleep_range(1000, 2000); + } while (--timeout); + + if (!timeout) + return -ETIMEDOUT; + + /* check for message delivery error */ + if (vsimsgsr & ENETC_VSIMSGSR_MS) { + dev_err(&si->pdev->dev, "VSI command execute error: %d\n", + ENETC_SIMSGSR_GET_MC(vsimsgsr)); + return -EIO; + } + + return 0; +} + +static int enetc_msg_vsi_set_primary_mac_addr(struct enetc_ndev_priv *priv, + struct sockaddr *saddr) +{ + struct enetc_msg_cmd_set_primary_mac *cmd; + struct enetc_msg_swbd msg; + int err; + + msg.size = ALIGN(sizeof(struct enetc_msg_cmd_set_primary_mac), 64); + msg.vaddr = dma_alloc_coherent(priv->dev, msg.size, &msg.dma, + GFP_KERNEL); + if (!msg.vaddr) { + dev_err(priv->dev, "Failed to alloc Tx msg (size: %d)\n", + msg.size); + return -ENOMEM; + } + + cmd = (struct enetc_msg_cmd_set_primary_mac *)msg.vaddr; + cmd->header.type = ENETC_MSG_CMD_MNG_MAC; + cmd->header.id = ENETC_MSG_CMD_MNG_ADD; + memcpy(&cmd->mac, saddr, sizeof(struct sockaddr)); + + /* send the command and wait */ + err = enetc_msg_vsi_send(priv->si, &msg); + + dma_free_coherent(priv->dev, msg.size, msg.vaddr, msg.dma); + + return err; +} + +static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct sockaddr *saddr = addr; + int err; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + err = enetc_msg_vsi_set_primary_mac_addr(priv, saddr); + if (err) + return err; + + return 0; +} + +static int enetc_vf_set_features(struct net_device *ndev, + netdev_features_t features) +{ + return enetc_set_features(ndev, features); +} + +/* Probing/ Init */ +static const struct net_device_ops enetc_ndev_ops = { + .ndo_open = enetc_open, + .ndo_stop = enetc_close, + .ndo_start_xmit = enetc_xmit, + .ndo_get_stats = enetc_get_stats, + .ndo_set_mac_address = enetc_vf_set_mac_addr, + .ndo_set_features = enetc_vf_set_features, +}; + +static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, + const struct net_device_ops *ndev_ops) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + + SET_NETDEV_DEV(ndev, &si->pdev->dev); + priv->ndev = ndev; + priv->si = si; + priv->dev = &si->pdev->dev; + si->ndev = ndev; + + priv->msg_enable = (NETIF_MSG_IFUP << 1) - 1; + ndev->netdev_ops = ndev_ops; + enetc_set_ethtool_ops(ndev); + ndev->watchdog_timeo = 5 * HZ; + ndev->max_mtu = ENETC_MAX_MTU; + + ndev->hw_features = NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | + NETIF_F_RXCSUM | NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + + if (si->num_rss) + ndev->hw_features |= NETIF_F_RXHASH; + + if (si->errata & ENETC_ERR_TXCSUM) { + ndev->hw_features &= ~NETIF_F_HW_CSUM; + ndev->features &= ~NETIF_F_HW_CSUM; + } + + /* pick up primary MAC address from SI */ + enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr); +} + +static int enetc_vf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct enetc_ndev_priv *priv; + struct net_device *ndev; + struct enetc_si *si; + int err; + + err = enetc_pci_probe(pdev, KBUILD_MODNAME, 0); + if (err) { + dev_err(&pdev->dev, "PCI probing failed\n"); + return err; + } + + si = pci_get_drvdata(pdev); + + enetc_get_si_caps(si); + + ndev = alloc_etherdev_mq(sizeof(*priv), ENETC_MAX_NUM_TXQS); + if (!ndev) { + err = -ENOMEM; + dev_err(&pdev->dev, "netdev creation failed\n"); + goto err_alloc_netdev; + } + + enetc_vf_netdev_setup(si, ndev, &enetc_ndev_ops); + + priv = netdev_priv(ndev); + + enetc_init_si_rings_params(priv); + + err = enetc_alloc_si_resources(priv); + if (err) { + dev_err(&pdev->dev, "SI resource alloc failed\n"); + goto err_alloc_si_res; + } + + err = enetc_alloc_msix(priv); + if (err) { + dev_err(&pdev->dev, "MSIX alloc failed\n"); + goto err_alloc_msix; + } + + err = register_netdev(ndev); + if (err) + goto err_reg_netdev; + + netif_carrier_off(ndev); + + netif_info(priv, probe, ndev, "%s v%s\n", + enetc_drv_name, enetc_drv_ver); + + return 0; + +err_reg_netdev: + enetc_free_msix(priv); +err_alloc_msix: + enetc_free_si_resources(priv); +err_alloc_si_res: + si->ndev = NULL; + free_netdev(ndev); +err_alloc_netdev: + enetc_pci_remove(pdev); + + return err; +} + +static void enetc_vf_remove(struct pci_dev *pdev) +{ + struct enetc_si *si = pci_get_drvdata(pdev); + struct enetc_ndev_priv *priv; + + priv = netdev_priv(si->ndev); + netif_info(priv, drv, si->ndev, "%s v%s remove\n", + enetc_drv_name, enetc_drv_ver); + unregister_netdev(si->ndev); + + enetc_free_msix(priv); + + enetc_free_si_resources(priv); + + free_netdev(si->ndev); + + enetc_pci_remove(pdev); +} + +static const struct pci_device_id enetc_vf_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_VF) }, + { 0, } /* End of table. */ +}; +MODULE_DEVICE_TABLE(pci, enetc_vf_id_table); + +static struct pci_driver enetc_vf_driver = { + .name = KBUILD_MODNAME, + .id_table = enetc_vf_id_table, + .probe = enetc_vf_probe, + .remove = enetc_vf_remove, +}; +module_pci_driver(enetc_vf_driver); + +MODULE_DESCRIPTION(ENETC_DRV_NAME_STR); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(ENETC_DRV_VER_STR); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index ae0f88bce9aa..2370dc204202 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3467,7 +3467,7 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_clk_ipg; - fep->reg_phy = devm_regulator_get(&pdev->dev, "phy"); + fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy"); if (!IS_ERR(fep->reg_phy)) { ret = regulator_enable(fep->reg_phy); if (ret) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 5b33238c6680..60e7d7ae3787 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2418,6 +2418,8 @@ static int hns_nic_dev_probe(struct platform_device *pdev) out_notify_fail: (void)cancel_work_sync(&priv->service_task); out_read_prop_fail: + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); free_netdev(ndev); return ret; } @@ -2447,6 +2449,9 @@ static int hns_nic_dev_remove(struct platform_device *pdev) set_bit(NIC_STATE_REMOVING, &priv->state); (void)cancel_work_sync(&priv->service_task); + /* safe for ACPI FW */ + of_node_put(to_of_node(priv->fwnode)); + free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 8e9b95871d30..ce15d2350db9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -1157,16 +1157,18 @@ static int hns_get_regs_len(struct net_device *net_dev) */ static int hns_nic_nway_reset(struct net_device *netdev) { - int ret = 0; struct phy_device *phy = netdev->phydev; - if (netif_running(netdev)) { - /* if autoneg is disabled, don't restart auto-negotiation */ - if (phy && phy->autoneg == AUTONEG_ENABLE) - ret = genphy_restart_aneg(phy); - } + if (!netif_running(netdev)) + return 0; - return ret; + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); } static u32 diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 781e5dee3c70..50011aafbae4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -238,7 +238,7 @@ EXPORT_SYMBOL(hnae3_unregister_ae_algo); * @ae_dev: the AE device * NOTE: the duplicated name will not be checked */ -void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) +int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) { const struct pci_device_id *id; struct hnae3_ae_algo *ae_algo; @@ -259,6 +259,7 @@ void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) if (!ae_dev->ops) { dev_err(&ae_dev->pdev->dev, "ae_dev ops are null\n"); + ret = -EOPNOTSUPP; goto out_err; } @@ -285,8 +286,15 @@ void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) ret); } -out_err: mutex_unlock(&hnae3_common_lock); + + return 0; + +out_err: + list_del(&ae_dev->node); + mutex_unlock(&hnae3_common_lock); + + return ret; } EXPORT_SYMBOL(hnae3_register_ae_dev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d486748d5883..e05b4926feb2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -433,7 +433,8 @@ struct hnae3_ae_ops { struct ethtool_channels *ch); void (*get_tqps_and_rss_info)(struct hnae3_handle *h, u16 *alloc_tqps, u16 *max_rss_size); - int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num); + int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num, + bool rxfh_configured); void (*get_flowctrl_adv)(struct hnae3_handle *handle, u32 *flowctrl_adv); int (*set_led_id)(struct hnae3_handle *handle, @@ -463,6 +464,8 @@ struct hnae3_ae_ops { int (*set_gro_en)(struct hnae3_handle *handle, int enable); u16 (*get_global_queue_id)(struct hnae3_handle *handle, u16 queue_id); void (*set_timer_task)(struct hnae3_handle *handle, bool enable); + int (*mac_connect_phy)(struct hnae3_handle *handle); + void (*mac_disconnect_phy)(struct hnae3_handle *handle); }; struct hnae3_dcb_ops { @@ -476,7 +479,6 @@ struct hnae3_dcb_ops { u8 (*getdcbx)(struct hnae3_handle *); u8 (*setdcbx)(struct hnae3_handle *, u8); - int (*map_update)(struct hnae3_handle *); int (*setup_tc)(struct hnae3_handle *, u8, u8 *); }; @@ -587,7 +589,7 @@ struct hnae3_handle { #define hnae3_get_bit(origin, shift) \ hnae3_get_field((origin), (0x1 << (shift)), (shift)) -void hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); +int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9dd8949381bc..c546b874d659 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -348,6 +348,8 @@ static int hns3_nic_net_up(struct net_device *netdev) return ret; } + clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); + /* enable the vectors */ for (i = 0; i < priv->vector_num; i++) hns3_vector_enable(&priv->tqp_vector[i]); @@ -361,11 +363,10 @@ static int hns3_nic_net_up(struct net_device *netdev) if (ret) goto out_start_err; - clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - return 0; out_start_err: + set_bit(HNS3_NIC_STATE_DOWN, &priv->state); while (j--) hns3_tqp_disable(h->kinfo.tqp[j]); @@ -506,7 +507,7 @@ static u8 hns3_get_netdev_flags(struct net_device *netdev) u8 flags = 0; if (netdev->flags & IFF_PROMISC) { - flags = HNAE3_USER_UPE | HNAE3_USER_MPE; + flags = HNAE3_USER_UPE | HNAE3_USER_MPE | HNAE3_BPE; } else { flags |= HNAE3_VLAN_FLTR; if (netdev->flags & IFF_ALLMULTI) @@ -541,13 +542,13 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev) } } - hns3_update_promisc_mode(netdev, new_flags); /* User mode Promisc mode enable and vlan filtering is disabled to * let all packets in. MAC-VLAN Table overflow Promisc enabled and * vlan fitering is enabled */ hns3_enable_vlan_filter(netdev, new_flags & HNAE3_VLAN_FLTR); h->netdev_flags = new_flags; + hns3_update_promisc_mode(netdev, new_flags); } int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags) @@ -654,11 +655,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, static int hns3_get_l4_protocol(struct sk_buff *skb, u8 *ol4_proto, u8 *il4_proto) { - union { - struct iphdr *v4; - struct ipv6hdr *v6; - unsigned char *hdr; - } l3; + union l3_hdr_info l3; unsigned char *l4_hdr; unsigned char *exthdr; u8 l4_proto_tmp; @@ -711,17 +708,8 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto, u8 il4_proto, u32 *type_cs_vlan_tso, u32 *ol_type_vlan_len_msec) { - union { - struct iphdr *v4; - struct ipv6hdr *v6; - unsigned char *hdr; - } l3; - union { - struct tcphdr *tcp; - struct udphdr *udp; - struct gre_base_hdr *gre; - unsigned char *hdr; - } l4; + union l3_hdr_info l3; + union l4_hdr_info l4; unsigned char *l2_hdr; u8 l4_proto = ol4_proto; u32 ol2_len; @@ -820,12 +808,7 @@ static void hns3_set_l2l3l4_len(struct sk_buff *skb, u8 ol4_proto, static bool hns3_tunnel_csum_bug(struct sk_buff *skb) { #define IANA_VXLAN_PORT 4789 - union { - struct tcphdr *tcp; - struct udphdr *udp; - struct gre_base_hdr *gre; - unsigned char *hdr; - } l4; + union l4_hdr_info l4; l4.hdr = skb_transport_header(skb); @@ -841,11 +824,7 @@ static int hns3_set_l3l4_type_csum(struct sk_buff *skb, u8 ol4_proto, u8 il4_proto, u32 *type_cs_vlan_tso, u32 *ol_type_vlan_len_msec) { - union { - struct iphdr *v4; - struct ipv6hdr *v6; - unsigned char *hdr; - } l3; + union l3_hdr_info l3; u32 l4_proto = ol4_proto; l3.hdr = skb_network_header(skb); @@ -1133,6 +1112,7 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, struct hns3_enet_ring *ring) { struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; struct skb_frag_struct *frag; int bdnum_for_frag; int frag_num; @@ -1155,7 +1135,19 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, buf_num += bdnum_for_frag; } - if (buf_num > ring_space(ring)) + if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { + buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE; + if (ring_space(ring) < buf_num) + return -EBUSY; + /* manual split the send packet */ + new_skb = skb_copy(skb, GFP_ATOMIC); + if (!new_skb) + return -ENOMEM; + dev_kfree_skb_any(skb); + *out_skb = new_skb; + } + + if (unlikely(ring_space(ring) < buf_num)) return -EBUSY; *bnum = buf_num; @@ -1166,11 +1158,24 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum, struct hns3_enet_ring *ring) { struct sk_buff *skb = *out_skb; + struct sk_buff *new_skb = NULL; int buf_num; /* No. of segments (plus a header) */ buf_num = skb_shinfo(skb)->nr_frags + 1; + if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { + buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE; + if (ring_space(ring) < buf_num) + return -EBUSY; + /* manual split the send packet */ + new_skb = skb_copy(skb, GFP_ATOMIC); + if (!new_skb) + return -ENOMEM; + dev_kfree_skb_any(skb); + *out_skb = new_skb; + } + if (unlikely(ring_space(ring) < buf_num)) return -EBUSY; @@ -1399,7 +1404,12 @@ static void hns3_nic_get_stats64(struct net_device *netdev, int queue_num = priv->ae_handle->kinfo.num_tqps; struct hnae3_handle *handle = priv->ae_handle; struct hns3_enet_ring *ring; + u64 rx_length_errors = 0; + u64 rx_crc_errors = 0; + u64 rx_multicast = 0; unsigned int start; + u64 tx_errors = 0; + u64 rx_errors = 0; unsigned int idx; u64 tx_bytes = 0; u64 rx_bytes = 0; @@ -1420,8 +1430,8 @@ static void hns3_nic_get_stats64(struct net_device *netdev, start = u64_stats_fetch_begin_irq(&ring->syncp); tx_bytes += ring->stats.tx_bytes; tx_pkts += ring->stats.tx_pkts; - tx_drop += ring->stats.tx_busy; tx_drop += ring->stats.sw_err_cnt; + tx_errors += ring->stats.sw_err_cnt; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); /* fetch the rx stats */ @@ -1431,8 +1441,13 @@ static void hns3_nic_get_stats64(struct net_device *netdev, rx_bytes += ring->stats.rx_bytes; rx_pkts += ring->stats.rx_pkts; rx_drop += ring->stats.non_vld_descs; - rx_drop += ring->stats.err_pkt_len; rx_drop += ring->stats.l2_err; + rx_errors += ring->stats.non_vld_descs; + rx_errors += ring->stats.l2_err; + rx_crc_errors += ring->stats.l2_err; + rx_crc_errors += ring->stats.l3l4_csum_err; + rx_multicast += ring->stats.rx_multicast; + rx_length_errors += ring->stats.err_pkt_len; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); } @@ -1441,15 +1456,15 @@ static void hns3_nic_get_stats64(struct net_device *netdev, stats->rx_bytes = rx_bytes; stats->rx_packets = rx_pkts; - stats->rx_errors = netdev->stats.rx_errors; - stats->multicast = netdev->stats.multicast; - stats->rx_length_errors = netdev->stats.rx_length_errors; - stats->rx_crc_errors = netdev->stats.rx_crc_errors; + stats->rx_errors = rx_errors; + stats->multicast = rx_multicast; + stats->rx_length_errors = rx_length_errors; + stats->rx_crc_errors = rx_crc_errors; stats->rx_missed_errors = netdev->stats.rx_missed_errors; - stats->tx_errors = netdev->stats.tx_errors; - stats->rx_dropped = rx_drop + netdev->stats.rx_dropped; - stats->tx_dropped = tx_drop + netdev->stats.tx_dropped; + stats->tx_errors = tx_errors; + stats->rx_dropped = rx_drop; + stats->tx_dropped = tx_drop; stats->collisions = netdev->stats.collisions; stats->rx_over_errors = netdev->stats.rx_over_errors; stats->rx_frame_errors = netdev->stats.rx_frame_errors; @@ -1472,8 +1487,6 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data) u8 tc = mqprio_qopt->qopt.num_tc; u16 mode = mqprio_qopt->mode; u8 hw = mqprio_qopt->qopt.hw; - bool if_running; - int ret; if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS && mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0))) @@ -1485,24 +1498,8 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data) if (!netdev) return -EINVAL; - if_running = netif_running(netdev); - if (if_running) { - hns3_nic_net_stop(netdev); - msleep(100); - } - - ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? + return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; - if (ret) - goto out; - - ret = hns3_nic_set_real_num_queue(netdev); - -out: - if (if_running) - hns3_nic_net_open(netdev); - - return ret; } static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, @@ -1755,9 +1752,13 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hns3_get_dev_capability(pdev, ae_dev); pci_set_drvdata(pdev, ae_dev); - hnae3_register_ae_dev(ae_dev); + ret = hnae3_register_ae_dev(ae_dev); + if (ret) { + devm_kfree(&pdev->dev, ae_dev); + pci_set_drvdata(pdev, NULL); + } - return 0; + return ret; } /* hns3_remove - Device removal routine @@ -1771,6 +1772,7 @@ static void hns3_remove(struct pci_dev *pdev) hns3_disable_sriov(pdev); hnae3_unregister_ae_dev(ae_dev); + pci_set_drvdata(pdev, NULL); } /** @@ -2572,6 +2574,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, struct sk_buff **out_skb) { struct net_device *netdev = ring->tqp->handle->kinfo.netdev; + enum hns3_pkt_l2t_type l2_frame_type; struct sk_buff *skb = ring->skb; struct hns3_desc_cb *desc_cb; struct hns3_desc *desc; @@ -2680,7 +2683,12 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, return -EFAULT; } + l2_frame_type = hnae3_get_field(l234info, HNS3_RXD_DMAC_M, + HNS3_RXD_DMAC_S); u64_stats_update_begin(&ring->syncp); + if (l2_frame_type == HNS3_L2_TYPE_MULTICAST) + ring->stats.rx_multicast++; + ring->stats.rx_pkts++; ring->stats.rx_bytes += skb->len; u64_stats_update_end(&ring->syncp); @@ -3175,12 +3183,12 @@ static void hns3_clear_ring_group(struct hns3_enet_ring_group *group) group->count = 0; } -static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) +static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) { struct hnae3_ring_chain_node vector_ring_chain; struct hnae3_handle *h = priv->ae_handle; struct hns3_enet_tqp_vector *tqp_vector; - int i, ret; + int i; for (i = 0; i < priv->vector_num; i++) { tqp_vector = &priv->tqp_vector[i]; @@ -3188,15 +3196,10 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring) continue; - ret = hns3_get_vector_ring_chain(tqp_vector, - &vector_ring_chain); - if (ret) - return ret; + hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain); - ret = h->ae_algo->ops->unmap_ring_from_vector(h, + h->ae_algo->ops->unmap_ring_from_vector(h, tqp_vector->vector_irq, &vector_ring_chain); - if (ret) - return ret; hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain); @@ -3212,8 +3215,6 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) hns3_clear_ring_group(&tqp_vector->tx_group); netif_napi_del(&priv->tqp_vector[i].napi); } - - return 0; } static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) @@ -3378,6 +3379,11 @@ static void hns3_fini_ring(struct hns3_enet_ring *ring) ring->desc_cb = NULL; ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending_buf = 0; + if (ring->skb) { + dev_kfree_skb_any(ring->skb); + ring->skb = NULL; + } } static int hns3_buf_size2type(u32 buf_size) @@ -3518,6 +3524,25 @@ static int hns3_init_mac_addr(struct net_device *netdev, bool init) return ret; } +static int hns3_init_phy(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + int ret = 0; + + if (h->ae_algo->ops->mac_connect_phy) + ret = h->ae_algo->ops->mac_connect_phy(h); + + return ret; +} + +static void hns3_uninit_phy(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (h->ae_algo->ops->mac_disconnect_phy) + h->ae_algo->ops->mac_disconnect_phy(h); +} + static int hns3_restore_fd_rules(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); @@ -3584,6 +3609,7 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->netdev = netdev; priv->ae_handle = handle; priv->tx_timeout_count = 0; + set_bit(HNS3_NIC_STATE_DOWN, &priv->state); handle->kinfo.netdev = netdev; handle->priv = (void *)priv; @@ -3626,6 +3652,10 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_init_ring_data; } + ret = hns3_init_phy(netdev); + if (ret) + goto out_init_phy; + ret = register_netdev(netdev); if (ret) { dev_err(priv->dev, "probe register netdev fail!\n"); @@ -3650,8 +3680,11 @@ static int hns3_client_init(struct hnae3_handle *handle) return ret; out_reg_netdev_fail: + hns3_uninit_phy(netdev); +out_init_phy: + hns3_uninit_all_ring(priv); out_init_ring_data: - (void)hns3_nic_uninit_vector_data(priv); + hns3_nic_uninit_vector_data(priv); out_init_vector_data: hns3_nic_dealloc_vector_data(priv); out_alloc_vector_data: @@ -3684,9 +3717,9 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_force_clear_all_rx_ring(handle); - ret = hns3_nic_uninit_vector_data(priv); - if (ret) - netdev_err(netdev, "uninit vector error\n"); + hns3_uninit_phy(netdev); + + hns3_nic_uninit_vector_data(priv); ret = hns3_nic_dealloc_vector_data(priv); if (ret) @@ -3728,7 +3761,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; struct net_device *ndev = kinfo->netdev; - int ret; if (tc > HNAE3_MAX_TC) return -EINVAL; @@ -3736,14 +3768,7 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) if (!ndev) return -ENODEV; - ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? - kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; - if (ret) - return ret; - - ret = hns3_nic_set_real_num_queue(ndev); - - return ret; + return hns3_nic_set_real_num_queue(ndev); } static int hns3_recover_hw_addr(struct net_device *ndev) @@ -4087,11 +4112,7 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) hns3_force_clear_all_rx_ring(handle); - ret = hns3_nic_uninit_vector_data(priv); - if (ret) { - netdev_err(netdev, "uninit vector error\n"); - return ret; - } + hns3_nic_uninit_vector_data(priv); hns3_store_coal(priv); @@ -4144,6 +4165,7 @@ int hns3_set_channels(struct net_device *netdev, { struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; + bool rxfh_configured = netif_is_rxfh_configured(netdev); u32 new_tqp_num = ch->combined_count; u16 org_tqp_num; int ret; @@ -4171,9 +4193,10 @@ int hns3_set_channels(struct net_device *netdev, return ret; org_tqp_num = h->kinfo.num_tqps; - ret = h->ae_algo->ops->set_channels(h, new_tqp_num); + ret = h->ae_algo->ops->set_channels(h, new_tqp_num, rxfh_configured); if (ret) { - ret = h->ae_algo->ops->set_channels(h, org_tqp_num); + ret = h->ae_algo->ops->set_channels(h, org_tqp_num, + rxfh_configured); if (ret) { /* If revert to old tqp failed, fatal error occurred */ dev_err(&netdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index f59ab7387b1f..71ff8f4d6c18 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -202,6 +202,13 @@ enum hns3_nic_state { #define HNS3_RING_EN_B 0 +enum hns3_pkt_l2t_type { + HNS3_L2_TYPE_UNICAST, + HNS3_L2_TYPE_MULTICAST, + HNS3_L2_TYPE_BROADCAST, + HNS3_L2_TYPE_INVALID, +}; + enum hns3_pkt_l3t_type { HNS3_L3T_NONE, HNS3_L3T_IPV6, @@ -376,6 +383,7 @@ struct ring_stats { u64 err_bd_num; u64 l2_err; u64 l3l4_csum_err; + u64 rx_multicast; }; }; }; @@ -566,6 +574,7 @@ union l3_hdr_info { union l4_hdr_info { struct tcphdr *tcp; struct udphdr *udp; + struct gre_base_hdr *gre; unsigned char *hdr; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index e678b6939da3..63f5f56bda94 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -47,6 +47,7 @@ static const struct hns3_stats hns3_rxq_stats[] = { HNS3_TQP_STAT("err_bd_num", err_bd_num), HNS3_TQP_STAT("l2_err", l2_err), HNS3_TQP_STAT("l3l4_csum_err", l3l4_csum_err), + HNS3_TQP_STAT("multicast", rx_multicast), }; #define HNS3_RXQ_STATS_COUNT ARRAY_SIZE(hns3_rxq_stats) @@ -116,7 +117,7 @@ static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode) ret = hns3_lp_setup(ndev, loop_mode, true); usleep_range(10000, 20000); - return 0; + return ret; } static int hns3_lp_down(struct net_device *ndev, enum hnae3_loop loop_mode) @@ -333,10 +334,10 @@ static void hns3_self_test(struct net_device *ndev, continue; data[test_index] = hns3_lp_up(ndev, loop_type); - if (!data[test_index]) { + if (!data[test_index]) data[test_index] = hns3_lp_run_test(ndev, loop_type); - hns3_lp_down(ndev, loop_type); - } + + hns3_lp_down(ndev, loop_type); if (data[test_index]) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -804,7 +805,7 @@ static int hns3_set_ringparam(struct net_device *ndev, old_desc_num, new_desc_num); if (if_running) - dev_close(ndev); + ndev->netdev_ops->ndo_stop(ndev); ret = hns3_uninit_all_ring(priv); if (ret) @@ -821,7 +822,7 @@ static int hns3_set_ringparam(struct net_device *ndev, } if (if_running) - ret = dev_open(ndev, NULL); + ret = ndev->netdev_ops->ndo_open(ndev); return ret; } @@ -1114,6 +1115,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = { .get_channels = hns3_get_channels, .get_coalesce = hns3_get_coalesce, .set_coalesce = hns3_set_coalesce, + .get_regs_len = hns3_get_regs_len, + .get_regs = hns3_get_regs, .get_link = hns3_get_link, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index e483a6e730e6..81dbe1b6abb0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -170,8 +170,12 @@ static bool hclge_is_special_opcode(u16 opcode) /* these commands have several descriptors, * and use the first one to save opcode and return value */ - u16 spec_opcode[3] = {HCLGE_OPC_STATS_64_BIT, - HCLGE_OPC_STATS_32_BIT, HCLGE_OPC_STATS_MAC}; + u16 spec_opcode[] = {HCLGE_OPC_STATS_64_BIT, + HCLGE_OPC_STATS_32_BIT, + HCLGE_OPC_STATS_MAC, + HCLGE_OPC_STATS_MAC_ALL, + HCLGE_OPC_QUERY_32_BIT_REG, + HCLGE_OPC_QUERY_64_BIT_REG}; int i; for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) { @@ -259,6 +263,10 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) if (desc_ret == HCLGE_CMD_EXEC_SUCCESS) retval = 0; + else if (desc_ret == HCLGE_CMD_NO_AUTH) + retval = -EPERM; + else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED) + retval = -EOPNOTSUPP; else retval = -EIO; hw->cmq.last_status = desc_ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index f23042b24c09..e26a25128693 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -39,7 +39,7 @@ struct hclge_cmq_ring { enum hclge_cmd_return_status { HCLGE_CMD_EXEC_SUCCESS = 0, HCLGE_CMD_NO_AUTH = 1, - HCLGE_CMD_NOT_EXEC = 2, + HCLGE_CMD_NOT_SUPPORTED = 2, HCLGE_CMD_QUEUE_FULL = 3, }; @@ -82,6 +82,8 @@ enum hclge_opcode_type { HCLGE_OPC_STATS_64_BIT = 0x0030, HCLGE_OPC_STATS_32_BIT = 0x0031, HCLGE_OPC_STATS_MAC = 0x0032, + HCLGE_OPC_QUERY_MAC_REG_NUM = 0x0033, + HCLGE_OPC_STATS_MAC_ALL = 0x0034, HCLGE_OPC_QUERY_REG_NUM = 0x0040, HCLGE_OPC_QUERY_32_BIT_REG = 0x0041, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 4ec0b9cd15ae..1161361a973b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -93,13 +93,11 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc, } } - for (i = 0; i < hdev->num_alloc_vport; i++) { - if (num_tc > hdev->vport[i].alloc_tqps) { - dev_err(&hdev->pdev->dev, - "allocated tqp(%u) checking failed, %u > tqp(%u)\n", - i, num_tc, hdev->vport[i].alloc_tqps); - return -EINVAL; - } + if (num_tc > hdev->vport[0].alloc_tqps) { + dev_err(&hdev->pdev->dev, + "allocated tqp checking failed, %u > tqp(%u)\n", + num_tc, hdev->vport[0].alloc_tqps); + return -EINVAL; } return 0; @@ -156,21 +154,15 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, return 0; } -static int hclge_map_update(struct hnae3_handle *h) +static int hclge_map_update(struct hclge_dev *hdev) { - struct hclge_vport *vport = hclge_get_vport(h); - struct hclge_dev *hdev = vport->back; int ret; - ret = hclge_tm_map_cfg(hdev); + ret = hclge_tm_schd_setup_hw(hdev); if (ret) return ret; - ret = hclge_tm_schd_mode_hw(hdev); - if (ret) - return ret; - - ret = hclge_pause_setup_hw(hdev); + ret = hclge_pause_setup_hw(hdev, false); if (ret) return ret; @@ -236,12 +228,17 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) ret = hclge_ieee_ets_to_tm_info(hdev, ets); if (ret) - return ret; + goto err_out; if (map_changed) { + ret = hclge_map_update(hdev); + if (ret) + goto err_out; + ret = hclge_client_setup_tc(hdev); if (ret) - return ret; + goto err_out; + ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT); if (ret) return ret; @@ -252,6 +249,16 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) } return hclge_tm_dwrr_cfg(hdev); + +err_out: + if (!map_changed) + return ret; + + if (hclge_notify_client(hdev, HNAE3_INIT_CLIENT)) + return ret; + + hclge_notify_client(hdev, HNAE3_UP_CLIENT); + return ret; } static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) @@ -300,6 +307,9 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) return -EINVAL; + if (pfc->pfc_en == hdev->tm_info.pfc_en) + return 0; + prio_tc = hdev->tm_info.prio_tc; pfc_map = 0; @@ -312,12 +322,10 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) } } - if (pfc_map == hdev->tm_info.hw_pfc_map) - return 0; - hdev->tm_info.hw_pfc_map = pfc_map; + hdev->tm_info.pfc_en = pfc->pfc_en; - return hclge_pause_setup_hw(hdev); + return hclge_pause_setup_hw(hdev, false); } /* DCBX configuration */ @@ -362,12 +370,24 @@ static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc) if (ret) return -EINVAL; + ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + if (ret) + return ret; + + ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); + if (ret) + return ret; + hclge_tm_schd_info_update(hdev, tc); hclge_tm_prio_tc_info_update(hdev, prio_tc); - ret = hclge_tm_init_hw(hdev); + ret = hclge_tm_init_hw(hdev, false); if (ret) - return ret; + goto err_out; + + ret = hclge_client_setup_tc(hdev); + if (ret) + goto err_out; hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; @@ -376,7 +396,18 @@ static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc) else hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE; - return 0; + ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT); + if (ret) + return ret; + + return hclge_notify_client(hdev, HNAE3_UP_CLIENT); + +err_out: + if (hclge_notify_client(hdev, HNAE3_INIT_CLIENT)) + return ret; + + hclge_notify_client(hdev, HNAE3_UP_CLIENT); + return ret; } static const struct hnae3_dcb_ops hns3_dcb_ops = { @@ -386,7 +417,6 @@ static const struct hnae3_dcb_ops hns3_dcb_ops = { .ieee_setpfc = hclge_ieee_setpfc, .getdcbx = hclge_getdcbx, .setdcbx = hclge_setdcbx, - .map_update = hclge_map_update, .setup_tc = hclge_setup_tc, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 00d7acb4d45a..2ffbf07ff829 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -118,6 +118,12 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { HCLGE_MAC_STATS_FIELD_OFF(mac_tx_mac_pause_num)}, {"mac_rx_mac_pause_num", HCLGE_MAC_STATS_FIELD_OFF(mac_rx_mac_pause_num)}, + {"mac_tx_control_pkt_num", + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_ctrl_pkt_num)}, + {"mac_rx_control_pkt_num", + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_ctrl_pkt_num)}, + {"mac_tx_pfc_pkt_num", + HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pause_pkt_num)}, {"mac_tx_pfc_pri0_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri0_pkt_num)}, {"mac_tx_pfc_pri1_pkt_num", @@ -134,6 +140,8 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri6_pkt_num)}, {"mac_tx_pfc_pri7_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_tx_pfc_pri7_pkt_num)}, + {"mac_rx_pfc_pkt_num", + HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pause_pkt_num)}, {"mac_rx_pfc_pri0_pkt_num", HCLGE_MAC_STATS_FIELD_OFF(mac_rx_pfc_pri0_pkt_num)}, {"mac_rx_pfc_pri1_pkt_num", @@ -287,10 +295,17 @@ static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = { }, }; -static int hclge_mac_update_stats(struct hclge_dev *hdev) +static const u8 hclge_hash_key[] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, + 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA +}; + +static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) { #define HCLGE_MAC_CMD_NUM 21 -#define HCLGE_RTN_DATA_NUM 4 u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; @@ -308,15 +323,18 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) } for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) { + /* for special opcode 0032, only the first desc has the head */ if (unlikely(i == 0)) { desc_data = (__le64 *)(&desc[i].data[0]); - n = HCLGE_RTN_DATA_NUM - 2; + n = HCLGE_RD_FIRST_STATS_NUM; } else { desc_data = (__le64 *)(&desc[i]); - n = HCLGE_RTN_DATA_NUM; + n = HCLGE_RD_OTHER_STATS_NUM; } + for (k = 0; k < n; k++) { - *data++ += le64_to_cpu(*desc_data); + *data += le64_to_cpu(*desc_data); + data++; desc_data++; } } @@ -324,6 +342,83 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) return 0; } +static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num) +{ + u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); + struct hclge_desc *desc; + __le64 *desc_data; + u16 i, k, n; + int ret; + + desc = kcalloc(desc_num, sizeof(struct hclge_desc), GFP_KERNEL); + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_STATS_MAC_ALL, true); + ret = hclge_cmd_send(&hdev->hw, desc, desc_num); + if (ret) { + kfree(desc); + return ret; + } + + for (i = 0; i < desc_num; i++) { + /* for special opcode 0034, only the first desc has the head */ + if (i == 0) { + desc_data = (__le64 *)(&desc[i].data[0]); + n = HCLGE_RD_FIRST_STATS_NUM; + } else { + desc_data = (__le64 *)(&desc[i]); + n = HCLGE_RD_OTHER_STATS_NUM; + } + + for (k = 0; k < n; k++) { + *data += le64_to_cpu(*desc_data); + data++; + desc_data++; + } + } + + kfree(desc); + + return 0; +} + +static int hclge_mac_query_reg_num(struct hclge_dev *hdev, u32 *desc_num) +{ + struct hclge_desc desc; + __le32 *desc_data; + u32 reg_num; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_REG_NUM, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + return ret; + + desc_data = (__le32 *)(&desc.data[0]); + reg_num = le32_to_cpu(*desc_data); + + *desc_num = 1 + ((reg_num - 3) >> 2) + + (u32)(((reg_num - 3) & 0x3) ? 1 : 0); + + return 0; +} + +static int hclge_mac_update_stats(struct hclge_dev *hdev) +{ + u32 desc_num; + int ret; + + ret = hclge_mac_query_reg_num(hdev, &desc_num); + + /* The firmware supports the new statistics acquisition method */ + if (!ret) + ret = hclge_mac_update_stats_complete(hdev, desc_num); + else if (ret == -EOPNOTSUPP) + ret = hclge_mac_update_stats_defective(hdev); + else + dev_err(&hdev->pdev->dev, "query mac reg num fail!\n"); + + return ret; +} + static int hclge_tqps_update_stats(struct hnae3_handle *handle) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; @@ -461,26 +556,6 @@ static u8 *hclge_comm_get_strings(u32 stringset, return (u8 *)buff; } -static void hclge_update_netstat(struct hclge_hw_stats *hw_stats, - struct net_device_stats *net_stats) -{ - net_stats->tx_dropped = 0; - net_stats->rx_errors = hw_stats->mac_stats.mac_rx_oversize_pkt_num; - net_stats->rx_errors += hw_stats->mac_stats.mac_rx_undersize_pkt_num; - net_stats->rx_errors += hw_stats->mac_stats.mac_rx_fcs_err_pkt_num; - - net_stats->multicast = hw_stats->mac_stats.mac_tx_multi_pkt_num; - net_stats->multicast += hw_stats->mac_stats.mac_rx_multi_pkt_num; - - net_stats->rx_crc_errors = hw_stats->mac_stats.mac_rx_fcs_err_pkt_num; - net_stats->rx_length_errors = - hw_stats->mac_stats.mac_rx_undersize_pkt_num; - net_stats->rx_length_errors += - hw_stats->mac_stats.mac_rx_oversize_pkt_num; - net_stats->rx_over_errors = - hw_stats->mac_stats.mac_rx_oversize_pkt_num; -} - static void hclge_update_stats_for_all(struct hclge_dev *hdev) { struct hnae3_handle *handle; @@ -500,8 +575,6 @@ static void hclge_update_stats_for_all(struct hclge_dev *hdev) if (status) dev_err(&hdev->pdev->dev, "Update MAC stats fail, status = %d.\n", status); - - hclge_update_netstat(&hdev->hw_stats, &handle->kinfo.netdev->stats); } static void hclge_update_stats(struct hnae3_handle *handle, @@ -509,7 +582,6 @@ static void hclge_update_stats(struct hnae3_handle *handle, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_hw_stats *hw_stats = &hdev->hw_stats; int status; if (test_and_set_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state)) @@ -527,8 +599,6 @@ static void hclge_update_stats(struct hnae3_handle *handle, "Update TQPS stats fail, status = %d.\n", status); - hclge_update_netstat(hw_stats, net_stats); - clear_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state); } @@ -937,6 +1007,9 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->tm_info.hw_pfc_map = 0; hdev->wanted_umv_size = cfg.umv_space; + if (hnae3_dev_fd_supported(hdev)) + hdev->fd_en = true; + ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); if (ret) { dev_err(&hdev->pdev->dev, "Get wrong speed ret=%d.\n", ret); @@ -2105,7 +2178,9 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev) static void hclge_update_link_status(struct hclge_dev *hdev) { + struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; + struct hnae3_handle *rhandle; struct hnae3_handle *handle; int state; int i; @@ -2117,6 +2192,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev) for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { handle = &hdev->vport[i].nic; client->ops->link_status_change(handle, state); + rhandle = &hdev->vport[i].roce; + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, + state); } hdev->hw.mac.link = state; } @@ -3584,8 +3663,11 @@ void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) static void hclge_rss_init_cfg(struct hclge_dev *hdev) { + int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; struct hclge_vport *vport = hdev->vport; - int i; + + if (hdev->pdev->revision >= 0x21) + rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { vport[i].rss_tuple_sets.ipv4_tcp_en = @@ -3605,9 +3687,10 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) vport[i].rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + vport[i].rss_algo = rss_algo; - netdev_rss_key_fill(vport[i].rss_hash_key, HCLGE_RSS_KEY_SIZE); + memcpy(vport[i].rss_hash_key, hclge_hash_key, + HCLGE_RSS_KEY_SIZE); } hclge_rss_indir_init_cfg(hdev); @@ -3775,8 +3858,16 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; struct hclge_promisc_param param; + bool en_bc_pmc = true; - hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, true, + /* For revision 0x20, if broadcast promisc enabled, vlan filter is + * always bypassed. So broadcast promisc should be disabled until + * user enable promisc mode + */ + if (handle->pdev->revision == 0x20) + en_bc_pmc = handle->netdev_flags & HNAE3_BPE ? true : false; + + hclge_promisc_param_init(¶m, en_uc_pmc, en_mc_pmc, en_bc_pmc, vport->vport_id); return hclge_cmd_set_promisc_mode(hdev, ¶m); } @@ -3885,7 +3976,6 @@ static int hclge_init_fd_config(struct hclge_dev *hdev) return -EOPNOTSUPP; } - hdev->fd_cfg.fd_en = true; hdev->fd_cfg.proto_support = TCP_V4_FLOW | UDP_V4_FLOW | SCTP_V4_FLOW | TCP_V6_FLOW | UDP_V6_FLOW | SCTP_V6_FLOW | IPV4_USER_FLOW | IPV6_USER_FLOW; @@ -4643,7 +4733,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, if (!hnae3_dev_fd_supported(hdev)) return -EOPNOTSUPP; - if (!hdev->fd_cfg.fd_en) { + if (!hdev->fd_en) { dev_warn(&hdev->pdev->dev, "Please enable flow director first\n"); return -EOPNOTSUPP; @@ -4796,7 +4886,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle) return 0; /* if fd is disabled, should not restore it when reset */ - if (!hdev->fd_cfg.fd_en) + if (!hdev->fd_en) return 0; hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { @@ -5082,7 +5172,7 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - hdev->fd_cfg.fd_en = enable; + hdev->fd_en = enable; if (!enable) hclge_del_all_fd_entries(handle, false); else @@ -5161,8 +5251,15 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, { #define HCLGE_SERDES_RETRY_MS 10 #define HCLGE_SERDES_RETRY_NUM 100 + +#define HCLGE_MAC_LINK_STATUS_MS 20 +#define HCLGE_MAC_LINK_STATUS_NUM 10 +#define HCLGE_MAC_LINK_STATUS_DOWN 0 +#define HCLGE_MAC_LINK_STATUS_UP 1 + struct hclge_serdes_lb_cmd *req; struct hclge_desc desc; + int mac_link_ret = 0; int ret, i = 0; u8 loop_mode_b; @@ -5185,8 +5282,10 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, if (en) { req->enable = loop_mode_b; req->mask = loop_mode_b; + mac_link_ret = HCLGE_MAC_LINK_STATUS_UP; } else { req->mask = loop_mode_b; + mac_link_ret = HCLGE_MAC_LINK_STATUS_DOWN; } ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -5218,7 +5317,19 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, } hclge_cfg_mac_mode(hdev, en); - return 0; + + i = 0; + do { + /* serdes Internal loopback, independent of the network cable.*/ + msleep(HCLGE_MAC_LINK_STATUS_MS); + ret = hclge_get_mac_link_status(hdev); + if (ret == mac_link_ret) + return 0; + } while (++i < HCLGE_MAC_LINK_STATUS_NUM); + + dev_err(&hdev->pdev->dev, "config mac mode timeout\n"); + + return -EBUSY; } static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, @@ -5264,6 +5375,9 @@ static int hclge_set_loopback(struct hnae3_handle *handle, break; } + if (ret) + return ret; + kinfo = &vport->nic.kinfo; for (i = 0; i < kinfo->num_tqps; i++) { ret = hclge_tqp_enable(hdev, i, 0, en); @@ -6950,16 +7064,6 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle, *tp_mdix = ETH_TP_MDI; } -static int hclge_init_instance_hw(struct hclge_dev *hdev) -{ - return hclge_mac_connect_phy(hdev); -} - -static void hclge_uninit_instance_hw(struct hclge_dev *hdev) -{ - hclge_mac_disconnect_phy(hdev); -} - static int hclge_init_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -6979,13 +7083,6 @@ static int hclge_init_client_instance(struct hnae3_client *client, if (ret) goto clear_nic; - ret = hclge_init_instance_hw(hdev); - if (ret) { - client->ops->uninit_instance(&vport->nic, - 0); - goto clear_nic; - } - hnae3_set_client_init_flag(client, ae_dev, 1); if (hdev->roce_client && @@ -7070,7 +7167,6 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (client->type == HNAE3_CLIENT_ROCE) return; if (hdev->nic_client && client->ops->uninit_instance) { - hclge_uninit_instance_hw(hdev); client->ops->uninit_instance(&vport->nic, 0); hdev->nic_client = NULL; vport->nic.client = NULL; @@ -7289,7 +7385,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_init_umv_space(hdev); if (ret) { dev_err(&pdev->dev, "umv space init error, ret=%d.\n", ret); - goto err_msi_irq_uninit; + goto err_mdiobus_unreg; } ret = hclge_mac_init(hdev); @@ -7447,7 +7543,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - ret = hclge_tm_init_hw(hdev); + ret = hclge_tm_init_hw(hdev, true); if (ret) { dev_err(&pdev->dev, "tm init hw fail, ret =%d\n", ret); return ret; @@ -7537,7 +7633,8 @@ static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle, *max_rss_size = hdev->rss_size_max; } -static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) +static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, + bool rxfh_configured) { struct hclge_vport *vport = hclge_get_vport(handle); struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; @@ -7576,6 +7673,10 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) if (ret) return ret; + /* RSS indirection table has been configuared by user */ + if (rxfh_configured) + goto out; + /* Reinitializes the rss indirect table according to the new RSS size */ rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL); if (!rss_indir) @@ -7591,6 +7692,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num) kfree(rss_indir); +out: if (!ret) dev_info(&hdev->pdev->dev, "Channels changed, rss_size from %d to %d, tqps from %d to %d", @@ -7970,6 +8072,8 @@ static const struct hnae3_ae_ops hclge_ops = { .set_gro_en = hclge_gro_en, .get_global_queue_id = hclge_covert_handle_qid_global, .set_timer_task = hclge_set_timer_task, + .mac_connect_phy = hclge_mac_connect_phy, + .mac_disconnect_phy = hclge_mac_disconnect_phy, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 279ed2d83cb8..c939f4a7f5f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -16,6 +16,9 @@ #define HCLGE_MAX_PF_NUM 8 +#define HCLGE_RD_FIRST_STATS_NUM 2 +#define HCLGE_RD_OTHER_STATS_NUM 4 + #define HCLGE_INVALID_VPORT 0xffff #define HCLGE_PF_CFG_BLOCK_SIZE 32 @@ -322,6 +325,7 @@ struct hclge_tm_info { struct hclge_tc_info tc_info[HNAE3_MAX_TC]; enum hclge_fc_mode fc_mode; u8 hw_pfc_map; /* Allow for packet drop or not on this TC */ + u8 pfc_en; /* PFC enabled or not for user priority */ }; struct hclge_comm_stats_str { @@ -415,6 +419,10 @@ struct hclge_mac_stats { u64 mac_rx_fcs_err_pkt_num; u64 mac_rx_send_app_good_pkt_num; u64 mac_rx_send_app_bad_pkt_num; + u64 mac_tx_pfc_pause_pkt_num; + u64 mac_rx_pfc_pause_pkt_num; + u64 mac_tx_ctrl_pkt_num; + u64 mac_rx_ctrl_pkt_num; }; #define HCLGE_STATS_TIMER_INTERVAL (60 * 5) @@ -575,7 +583,6 @@ struct hclge_fd_key_cfg { struct hclge_fd_cfg { u8 fd_mode; - u8 fd_en; u16 max_key_length; u32 proto_support; u32 rule_num[2]; /* rule entry number */ @@ -750,6 +757,7 @@ struct hclge_dev { struct hclge_fd_cfg fd_cfg; struct hlist_head fd_rule_list; u16 hclge_fd_rule_num; + u8 fd_en; u16 wanted_umv_size; /* max available unicast mac vlan space */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a1de451a85df..6afb0a4b73f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -203,12 +203,11 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *req) { - bool en_uc = req->msg[1] ? true : false; - bool en_mc = req->msg[2] ? true : false; + bool en_bc = req->msg[1] ? true : false; struct hclge_promisc_param param; - /* always enable broadcast promisc bit */ - hclge_promisc_param_init(¶m, en_uc, en_mc, true, vport->vport_id); + /* vf is not allowed to enable unicast/multicast broadcast */ + hclge_promisc_param_init(¶m, false, false, en_bc, vport->vport_id); return hclge_cmd_set_promisc_mode(vport->back, ¶m); } @@ -320,10 +319,14 @@ static int hclge_get_vf_tcinfo(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req, bool gen_resp) { - struct hclge_dev *hdev = vport->back; - int ret; + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + u8 vf_tc_map = 0; + int i, ret; - ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &hdev->hw_tc_map, + for (i = 0; i < kinfo->num_tc; i++) + vf_tc_map |= BIT(i); + + ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &vf_tc_map, sizeof(u8)); return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index dabb8437f8dc..84f28785ba28 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -195,8 +195,10 @@ static void hclge_mac_adjust_link(struct net_device *netdev) netdev_err(netdev, "failed to configure flow control.\n"); } -int hclge_mac_connect_phy(struct hclge_dev *hdev) +int hclge_mac_connect_phy(struct hnae3_handle *handle) { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; struct net_device *netdev = hdev->vport[0].nic.netdev; struct phy_device *phydev = hdev->hw.mac.phydev; __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -229,8 +231,10 @@ int hclge_mac_connect_phy(struct hclge_dev *hdev) return 0; } -void hclge_mac_disconnect_phy(struct hclge_dev *hdev) +void hclge_mac_disconnect_phy(struct hnae3_handle *handle) { + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; struct phy_device *phydev = hdev->hw.mac.phydev; if (!phydev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 5fbf7dddb5d9..ef095d9c566f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -5,8 +5,8 @@ #define __HCLGE_MDIO_H int hclge_mac_mdio_config(struct hclge_dev *hdev); -int hclge_mac_connect_phy(struct hclge_dev *hdev); -void hclge_mac_disconnect_phy(struct hclge_dev *hdev); +int hclge_mac_connect_phy(struct hnae3_handle *handle); +void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index fb8596a3e5e4..aafc69f4bfdd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -520,8 +520,14 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) u16 max_rss_size; u8 i; - vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; - kinfo->num_tc = min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); + /* TC configuration is shared by PF/VF in one port, only allow + * one tc for VF for simplicity. VF's vport_id is non zero. + */ + kinfo->num_tc = vport->vport_id ? 1 : + min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); + vport->qs_offset = (vport->vport_id ? hdev->tm_info.num_tc : 0) + + (vport->vport_id ? (vport->vport_id - 1) : 0); + max_rss_size = min_t(u16, hdev->rss_size_max, vport->alloc_tqps / kinfo->num_tc); @@ -538,12 +544,12 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) } kinfo->num_tqps = kinfo->num_tc * kinfo->rss_size; - vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id; vport->dwrr = 100; /* 100 percent as init */ vport->alloc_rss_size = kinfo->rss_size; + vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; for (i = 0; i < HNAE3_MAX_TC; i++) { - if (hdev->hw_tc_map & BIT(i)) { + if (hdev->hw_tc_map & BIT(i) && i < kinfo->num_tc) { kinfo->tc_info[i].enable = true; kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size; kinfo->tc_info[i].tqp_count = kinfo->rss_size; @@ -766,13 +772,17 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev) if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { /* Cfg qs -> pri mapping, one by one mapping */ - for (k = 0; k < hdev->num_alloc_vport; k++) - for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (k = 0; k < hdev->num_alloc_vport; k++) { + struct hnae3_knic_private_info *kinfo = + &vport[k].nic.kinfo; + + for (i = 0; i < kinfo->num_tc; i++) { ret = hclge_tm_qs_to_pri_map_cfg( hdev, vport[k].qs_offset + i, i); if (ret) return ret; } + } } else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) { /* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */ for (k = 0; k < hdev->num_alloc_vport; k++) @@ -947,6 +957,36 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) return 0; } +static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) +{ +#define DEFAULT_TC_WEIGHT 1 +#define DEFAULT_TC_OFFSET 14 + + struct hclge_ets_tc_weight_cmd *ets_weight; + struct hclge_desc desc; + int i; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, false); + ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + struct hclge_pg_info *pg_info; + + ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT; + + if (!(hdev->hw_tc_map & BIT(i))) + continue; + + pg_info = + &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; + ets_weight->tc_weight[i] = pg_info->tc_dwrr[i]; + } + + ets_weight->weight_offset = DEFAULT_TC_OFFSET; + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_tm_pri_vnet_base_dwrr_pri_cfg(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; @@ -996,6 +1036,19 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) ret = hclge_tm_pri_tc_base_dwrr_cfg(hdev); if (ret) return ret; + + if (!hnae3_dev_dcb_supported(hdev)) + return 0; + + ret = hclge_tm_ets_tc_dwrr_cfg(hdev); + if (ret == -EOPNOTSUPP) { + dev_warn(&hdev->pdev->dev, + "fw %08x does't support ets tc weight cmd\n", + hdev->fw_version); + ret = 0; + } + + return ret; } else { ret = hclge_tm_pri_vnet_base_dwrr_cfg(hdev); if (ret) @@ -1005,7 +1058,7 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) return 0; } -int hclge_tm_map_cfg(struct hclge_dev *hdev) +static int hclge_tm_map_cfg(struct hclge_dev *hdev) { int ret; @@ -1120,7 +1173,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) return 0; } -int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) +static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) { int ret; @@ -1131,7 +1184,7 @@ int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) return hclge_tm_lvl34_schd_mode_cfg(hdev); } -static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) +int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) { int ret; @@ -1172,7 +1225,7 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev) HCLGE_RX_MAC_PAUSE_EN_MSK; return hclge_pfc_pause_en_cfg(hdev, enable_bitmap, - hdev->tm_info.hw_pfc_map); + hdev->tm_info.pfc_en); } /* Each Tc has a 1024 queue sets to backpress, it divides to @@ -1255,7 +1308,7 @@ static int hclge_tm_bp_setup(struct hclge_dev *hdev) return ret; } -int hclge_pause_setup_hw(struct hclge_dev *hdev) +int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init) { int ret; @@ -1271,10 +1324,15 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev) if (!hnae3_dev_dcb_supported(hdev)) return 0; - /* When MAC is GE Mode, hdev does not support pfc setting */ + /* GE MAC does not support PFC, when driver is initializing and MAC + * is in GE Mode, ignore the error here, otherwise initialization + * will fail. + */ ret = hclge_pfc_setup_hw(hdev); - if (ret) - dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); + if (init && ret == -EOPNOTSUPP) + dev_warn(&hdev->pdev->dev, "GE MAC does not support pfc\n"); + else + return ret; return hclge_tm_bp_setup(hdev); } @@ -1314,7 +1372,7 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) hclge_tm_schd_info_init(hdev); } -int hclge_tm_init_hw(struct hclge_dev *hdev) +int hclge_tm_init_hw(struct hclge_dev *hdev, bool init) { int ret; @@ -1326,7 +1384,7 @@ int hclge_tm_init_hw(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_pause_setup_hw(hdev); + ret = hclge_pause_setup_hw(hdev, init); if (ret) return ret; @@ -1345,7 +1403,7 @@ int hclge_tm_schd_init(struct hclge_dev *hdev) if (ret) return ret; - return hclge_tm_init_hw(hdev); + return hclge_tm_init_hw(hdev, true); } int hclge_tm_vport_map_update(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 898163c4d400..f60e540c7a62 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -143,13 +143,12 @@ struct hclge_port_shapping_cmd { int hclge_tm_schd_init(struct hclge_dev *hdev); int hclge_tm_vport_map_update(struct hclge_dev *hdev); -int hclge_pause_setup_hw(struct hclge_dev *hdev); -int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); +int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init); +int hclge_tm_schd_setup_hw(struct hclge_dev *hdev); void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc); int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); -int hclge_tm_map_cfg(struct hclge_dev *hdev); -int hclge_tm_init_hw(struct hclge_dev *hdev); +int hclge_tm_init_hw(struct hclge_dev *hdev, bool init); int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index bb9f45200ef5..b9cdbd5dd6cb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -21,6 +21,14 @@ static const struct pci_device_id ae_algovf_pci_tbl[] = { {0, } }; +static const u8 hclgevf_hash_key[] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, + 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0, + 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, + 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA +}; + MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl); static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG, @@ -78,7 +86,12 @@ static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG, static inline struct hclgevf_dev *hclgevf_ae_get_hdev( struct hnae3_handle *handle) { - return container_of(handle, struct hclgevf_dev, nic); + if (!handle->client) + return container_of(handle, struct hclgevf_dev, nic); + else if (handle->client->type == HNAE3_CLIENT_ROCE) + return container_of(handle, struct hclgevf_dev, roce); + else + return container_of(handle, struct hclgevf_dev, nic); } static int hclgevf_tqps_update_stats(struct hnae3_handle *handle) @@ -349,16 +362,21 @@ static void hclgevf_request_link_info(struct hclgevf_dev *hdev) void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) { + struct hnae3_handle *rhandle = &hdev->roce; struct hnae3_handle *handle = &hdev->nic; + struct hnae3_client *rclient; struct hnae3_client *client; client = handle->client; + rclient = hdev->roce_client; link_state = test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state; if (link_state != hdev->hw.mac.link) { client->ops->link_status_change(handle, !!link_state); + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, !!link_state); hdev->hw.mac.link = link_state; } } @@ -964,33 +982,29 @@ static int hclgevf_put_vector(struct hnae3_handle *handle, int vector) } static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev, - bool en_uc_pmc, bool en_mc_pmc) + bool en_bc_pmc) { struct hclge_mbx_vf_to_pf_cmd *req; struct hclgevf_desc desc; - int status; + int ret; req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); req->msg[0] = HCLGE_MBX_SET_PROMISC_MODE; - req->msg[1] = en_uc_pmc ? 1 : 0; - req->msg[2] = en_mc_pmc ? 1 : 0; + req->msg[1] = en_bc_pmc ? 1 : 0; - status = hclgevf_cmd_send(&hdev->hw, &desc, 1); - if (status) + ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); + if (ret) dev_err(&hdev->pdev->dev, - "Set promisc mode fail, status is %d.\n", status); + "Set promisc mode fail, status is %d.\n", ret); - return status; + return ret; } -static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, - bool en_uc_pmc, bool en_mc_pmc) +static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - - return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc); + return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc); } static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id, @@ -1610,6 +1624,10 @@ static void hclgevf_keep_alive_task(struct work_struct *work) int ret; hdev = container_of(work, struct hclgevf_dev, keep_alive_task); + + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + return; + ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL, 0, false, &respmsg, sizeof(u8)); if (ret) @@ -1788,9 +1806,9 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) rss_cfg->rss_size = hdev->rss_size_max; if (hdev->pdev->revision >= 0x21) { - rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_TOEPLITZ; - netdev_rss_key_fill(rss_cfg->rss_hash_key, - HCLGEVF_RSS_KEY_SIZE); + rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE; + memcpy(rss_cfg->rss_hash_key, hclgevf_hash_key, + HCLGEVF_RSS_KEY_SIZE); ret = hclgevf_set_rss_algo_key(hdev, rss_cfg->hash_algo, rss_cfg->rss_hash_key); @@ -2377,6 +2395,15 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) if (ret) goto err_config; + /* vf is not allowed to enable unicast/multicast promisc mode. + * For revision 0x20, default to disable broadcast promisc mode, + * firmware makes sure broadcast packets can be accepted. + * For revision 0x21, default to enable broadcast promisc mode. + */ + ret = hclgevf_set_promisc_mode(hdev, true); + if (ret) + goto err_config; + /* Initialize RSS for this VF */ ret = hclgevf_rss_init_hw(hdev); if (ret) { @@ -2461,7 +2488,8 @@ static u32 hclgevf_get_max_channels(struct hclgevf_dev *hdev) struct hnae3_handle *nic = &hdev->nic; struct hnae3_knic_private_info *kinfo = &nic->kinfo; - return min_t(u32, hdev->rss_size_max * kinfo->num_tc, hdev->num_tqps); + return min_t(u32, hdev->rss_size_max, + hdev->num_tqps / kinfo->num_tc); } /** @@ -2482,7 +2510,7 @@ static void hclgevf_get_channels(struct hnae3_handle *handle, ch->max_combined = hclgevf_get_max_channels(hdev); ch->other_count = 0; ch->max_other = 0; - ch->combined_count = hdev->num_tqps; + ch->combined_count = handle->kinfo.rss_size; } static void hclgevf_get_tqps_and_rss_info(struct hnae3_handle *handle, @@ -2640,7 +2668,6 @@ static const struct hnae3_ae_ops hclgevf_ops = { .get_vector = hclgevf_get_vector, .put_vector = hclgevf_put_vector, .reset_queue = hclgevf_reset_tqp, - .set_promisc_mode = hclgevf_set_promisc_mode, .get_mac_addr = hclgevf_get_mac_addr, .set_mac_addr = hclgevf_set_mac_addr, .add_uc_addr = hclgevf_add_uc_addr, diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 017e08452d8c..baf5cc251f32 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -321,7 +321,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) } hns_mdio_cmd_write(mdio_dev, is_c45, - MDIO_C45_WRITE_ADDR, phy_id, devad); + MDIO_C45_READ, phy_id, devad); } /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/ diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index da323b9e1f62..e64bc664f687 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -51,9 +51,10 @@ static unsigned int rx_weight = 64; module_param(rx_weight, uint, 0644); MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)"); -#define HINIC_DEV_ID_QUAD_PORT_25GE 0x1822 -#define HINIC_DEV_ID_DUAL_PORT_25GE 0x0200 -#define HINIC_DEV_ID_DUAL_PORT_100GE 0x0201 +#define HINIC_DEV_ID_QUAD_PORT_25GE 0x1822 +#define HINIC_DEV_ID_DUAL_PORT_100GE 0x0200 +#define HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ 0x0205 +#define HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ 0x0210 #define HINIC_WQ_NAME "hinic_dev" @@ -1113,8 +1114,9 @@ static void hinic_shutdown(struct pci_dev *pdev) static const struct pci_device_id hinic_pci_table[] = { { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE), 0}, - { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_25GE), 0}, { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE), 0}, + { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ), 0}, + { PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ), 0}, { 0, 0} }; MODULE_DEVICE_TABLE(pci, hinic_pci_table); diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index d719668a6684..92929750f832 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1310,7 +1310,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) dev->stats.tx_aborted_errors++; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); tx_cmd->cmd.command = 0; /* Mark free */ break; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 098d8764c0ea..dd71d5db7274 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1313,7 +1313,6 @@ static int ibmveth_poll(struct napi_struct *napi, int budget) unsigned long lpar_rc; u16 mss = 0; -restart_poll: while (frames_processed < budget) { if (!ibmveth_rxq_pending_buffer(adapter)) break; @@ -1401,7 +1400,6 @@ restart_poll: napi_reschedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - goto restart_poll; } } diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 8de9085bba9e..d684998ba2b0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -34,6 +34,7 @@ #include #include #include +#include #include "i40e_type.h" #include "i40e_prototype.h" #include "i40e_client.h" @@ -523,6 +524,8 @@ struct i40e_pf { #define I40E_FLAG_FD_SB_INACTIVE BIT(22) #define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(23) #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) +#define I40E_FLAG_RS_FEC BIT(25) +#define I40E_FLAG_BASE_R_FEC BIT(26) struct i40e_client_instance *cinst; bool stat_offsets_loaded; @@ -787,11 +790,6 @@ struct i40e_vsi { /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); - - /* AF_XDP zero-copy */ - struct xdp_umem **xsk_umems; - u16 num_xsk_umems_used; - u16 num_xsk_umems; } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { @@ -1091,6 +1089,8 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); +void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags); + static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) { return !!vsi->xdp_prog; @@ -1104,10 +1104,10 @@ static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring) if (ring_is_xdp(ring)) qid -= ring->vsi->alloc_queue_pairs; - if (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on) + if (!xdp_on) return NULL; - return ring->vsi->xsk_umems[qid]; + return xdp_get_umem_from_qid(ring->vsi->netdev, qid); } int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index a20d1cf058ad..c67d485d6f99 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1642,30 +1642,7 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, count = buf_tmp - i40e_dbg_netdev_ops_buf + 1; } - if (strncmp(i40e_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid); - if (cnt != 1) { - dev_info(&pf->pdev->dev, "tx_timeout \n"); - goto netdev_ops_write_done; - } - vsi = i40e_dbg_find_vsi(pf, vsi_seid); - if (!vsi) { - dev_info(&pf->pdev->dev, - "tx_timeout: VSI %d not found\n", vsi_seid); - } else if (!vsi->netdev) { - dev_info(&pf->pdev->dev, "tx_timeout: no netdev for VSI %d\n", - vsi_seid); - } else if (test_bit(__I40E_VSI_DOWN, vsi->state)) { - dev_info(&pf->pdev->dev, "tx_timeout: VSI %d not UP\n", - vsi_seid); - } else if (rtnl_trylock()) { - vsi->netdev->netdev_ops->ndo_tx_timeout(vsi->netdev); - rtnl_unlock(); - dev_info(&pf->pdev->dev, "tx_timeout called\n"); - } else { - dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); - } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) { + if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) { int mtu; cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i", @@ -1733,7 +1710,6 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "unknown command '%s'\n", i40e_dbg_netdev_ops_buf); dev_info(&pf->pdev->dev, "available commands\n"); - dev_info(&pf->pdev->dev, " tx_timeout \n"); dev_info(&pf->pdev->dev, " change_mtu \n"); dev_info(&pf->pdev->dev, " set_rx_mode \n"); dev_info(&pf->pdev->dev, " napi \n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index a6bc7847346b..4c885801fa26 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -438,6 +438,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("disable-source-pruning", I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0), + I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0), + I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -606,6 +608,24 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseCR_Full); } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) { + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) { + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); + } + } /* need to add new 10G PHY types */ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) { @@ -721,6 +741,13 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, 25000baseSR_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseSR_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, @@ -825,6 +852,9 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, 40000baseKR4_Full); ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); ethtool_link_ksettings_add_link_mode(ks, supported, 20000baseKR2_Full); ethtool_link_ksettings_add_link_mode(ks, supported, @@ -838,6 +868,10 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, 40000baseKR4_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); ethtool_link_ksettings_add_link_mode(ks, advertising, 20000baseKR2_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, @@ -855,6 +889,13 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, 25000baseCR_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); break; case I40E_PHY_TYPE_25GBASE_AOC: case I40E_PHY_TYPE_25GBASE_ACC: @@ -862,9 +903,15 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseCR_Full); - ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseCR_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, @@ -1261,6 +1308,154 @@ done: return err; } +static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_aq_get_phy_abilities_resp abilities; + struct i40e_pf *pf = np->vsi->back; + struct i40e_hw *hw = &pf->hw; + i40e_status status = 0; + u32 flags = 0; + int err = 0; + + flags = READ_ONCE(pf->flags); + i40e_set_fec_in_flags(fec_cfg, &flags); + + /* Get the current phy config */ + memset(&abilities, 0, sizeof(abilities)); + status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (status) { + err = -EAGAIN; + goto done; + } + + if (abilities.fec_cfg_curr_mod_ext_info != fec_cfg) { + struct i40e_aq_set_phy_config config; + + memset(&config, 0, sizeof(config)); + config.phy_type = abilities.phy_type; + config.abilities = abilities.abilities; + config.phy_type_ext = abilities.phy_type_ext; + config.link_speed = abilities.link_speed; + config.eee_capability = abilities.eee_capability; + config.eeer = abilities.eeer_val; + config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = fec_cfg & I40E_AQ_PHY_FEC_CONFIG_MASK; + status = i40e_aq_set_phy_config(hw, &config, NULL); + if (status) { + netdev_info(netdev, + "Set phy config failed, err %s aq_err %s\n", + i40e_stat_str(hw, status), + i40e_aq_str(hw, hw->aq.asq_last_status)); + err = -EAGAIN; + goto done; + } + pf->flags = flags; + status = i40e_update_link_info(hw); + if (status) + /* debug level message only due to relation to the link + * itself rather than to the FEC settings + * (e.g. no physical connection etc.) + */ + netdev_dbg(netdev, + "Updating link info failed with err %s aq_err %s\n", + i40e_stat_str(hw, status), + i40e_aq_str(hw, hw->aq.asq_last_status)); + } + +done: + return err; +} + +static int i40e_get_fec_param(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_aq_get_phy_abilities_resp abilities; + struct i40e_pf *pf = np->vsi->back; + struct i40e_hw *hw = &pf->hw; + i40e_status status = 0; + int err = 0; + + /* Get the current phy config */ + memset(&abilities, 0, sizeof(abilities)); + status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (status) { + err = -EAGAIN; + goto done; + } + + fecparam->fec = 0; + if (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_AUTO) + fecparam->fec |= ETHTOOL_FEC_AUTO; + if ((abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_SET_FEC_REQUEST_RS) || + (abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_SET_FEC_ABILITY_RS)) + fecparam->fec |= ETHTOOL_FEC_RS; + if ((abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_SET_FEC_REQUEST_KR) || + (abilities.fec_cfg_curr_mod_ext_info & I40E_AQ_SET_FEC_ABILITY_KR)) + fecparam->fec |= ETHTOOL_FEC_BASER; + if (abilities.fec_cfg_curr_mod_ext_info == 0) + fecparam->fec |= ETHTOOL_FEC_OFF; + + if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) + fecparam->active_fec = ETHTOOL_FEC_BASER; + else if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) + fecparam->active_fec = ETHTOOL_FEC_RS; + else + fecparam->active_fec = ETHTOOL_FEC_OFF; +done: + return err; +} + +static int i40e_set_fec_param(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; + struct i40e_hw *hw = &pf->hw; + u8 fec_cfg = 0; + int err = 0; + + if (hw->device_id != I40E_DEV_ID_25G_SFP28 && + hw->device_id != I40E_DEV_ID_25G_B) { + err = -EPERM; + goto done; + } + + switch (fecparam->fec) { + case ETHTOOL_FEC_AUTO: + fec_cfg = I40E_AQ_SET_FEC_AUTO; + break; + case ETHTOOL_FEC_RS: + fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS | + I40E_AQ_SET_FEC_ABILITY_RS); + break; + case ETHTOOL_FEC_BASER: + fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR | + I40E_AQ_SET_FEC_ABILITY_KR); + break; + case ETHTOOL_FEC_OFF: + case ETHTOOL_FEC_NONE: + fec_cfg = 0; + break; + default: + dev_warn(&pf->pdev->dev, "Unsupported FEC mode: %d", + fecparam->fec); + err = -EINVAL; + goto done; + } + + err = i40e_set_fec_cfg(netdev, fec_cfg); + +done: + return err; +} + static int i40e_nway_reset(struct net_device *netdev) { /* restart autonegotiation */ @@ -1376,7 +1571,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, else if (!pause->rx_pause && !pause->tx_pause) hw->fc.requested_mode = I40E_FC_NONE; else - return -EINVAL; + return -EINVAL; /* Tell the OS link is going down, the link will go back up when fw * says it is ready asynchronously @@ -2175,7 +2370,7 @@ static int i40e_get_ts_info(struct net_device *dev, return 0; } -static int i40e_link_test(struct net_device *netdev, u64 *data) +static u64 i40e_link_test(struct net_device *netdev, u64 *data) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2198,7 +2393,7 @@ static int i40e_link_test(struct net_device *netdev, u64 *data) return *data; } -static int i40e_reg_test(struct net_device *netdev, u64 *data) +static u64 i40e_reg_test(struct net_device *netdev, u64 *data) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2209,7 +2404,7 @@ static int i40e_reg_test(struct net_device *netdev, u64 *data) return *data; } -static int i40e_eeprom_test(struct net_device *netdev, u64 *data) +static u64 i40e_eeprom_test(struct net_device *netdev, u64 *data) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2223,7 +2418,7 @@ static int i40e_eeprom_test(struct net_device *netdev, u64 *data) return *data; } -static int i40e_intr_test(struct net_device *netdev, u64 *data) +static u64 i40e_intr_test(struct net_device *netdev, u64 *data) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; @@ -2440,10 +2635,10 @@ static int i40e_set_phys_id(struct net_device *netdev, default: break; } - if (ret) - return -ENOENT; - else - return 0; + if (ret) + return -ENOENT; + else + return 0; } /* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt @@ -4676,6 +4871,15 @@ flags_complete: } } + if (((changed_flags & I40E_FLAG_RS_FEC) || + (changed_flags & I40E_FLAG_BASE_R_FEC)) && + pf->hw.device_id != I40E_DEV_ID_25G_SFP28 && + pf->hw.device_id != I40E_DEV_ID_25G_B) { + dev_warn(&pf->pdev->dev, + "Device does not support changing FEC configuration\n"); + return -EOPNOTSUPP; + } + /* Now that we've checked to ensure that the new flags are valid, load * them into place. Since we only modify flags either (a) during * initialization or (b) while holding the RTNL lock, we don't need @@ -4714,6 +4918,24 @@ flags_complete: } } + if ((changed_flags & I40E_FLAG_RS_FEC) || + (changed_flags & I40E_FLAG_BASE_R_FEC)) { + u8 fec_cfg = 0; + + if (pf->flags & I40E_FLAG_RS_FEC && + pf->flags & I40E_FLAG_BASE_R_FEC) { + fec_cfg = I40E_AQ_SET_FEC_AUTO; + } else if (pf->flags & I40E_FLAG_RS_FEC) { + fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS | + I40E_AQ_SET_FEC_ABILITY_RS); + } else if (pf->flags & I40E_FLAG_BASE_R_FEC) { + fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR | + I40E_AQ_SET_FEC_ABILITY_KR); + } + if (i40e_set_fec_cfg(dev, fec_cfg)) + dev_warn(&pf->pdev->dev, "Cannot change FEC config\n"); + } + if ((changed_flags & pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && (pf->flags & I40E_FLAG_MFP_ENABLED)) @@ -4948,6 +5170,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .set_per_queue_coalesce = i40e_set_per_queue_coalesce, .get_link_ksettings = i40e_get_link_ksettings, .set_link_ksettings = i40e_set_link_ksettings, + .get_fecparam = i40e_get_fec_param, + .set_fecparam = i40e_set_fec_param, }; void i40e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0ee641c41be4..5c6731b97059 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -26,8 +26,8 @@ static const char i40e_driver_string[] = #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 2 -#define DRV_VERSION_MINOR 7 -#define DRV_VERSION_BUILD 6 +#define DRV_VERSION_MINOR 8 +#define DRV_VERSION_BUILD 10 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -3610,7 +3610,7 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); - wr32(hw, I40E_QINT_TQCTL(nextqp), val); + wr32(hw, I40E_QINT_TQCTL(nextqp), val); } val = I40E_QINT_TQCTL_CAUSE_ENA_MASK | @@ -8131,8 +8131,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, i40e_service_event_schedule(pf); } else { i40e_pf_unquiesce_all_vsi(pf); - set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); - set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); + set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); + set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } exit: @@ -11042,6 +11042,7 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) if (!(pf->flags & I40E_FLAG_RSS_ENABLED)) return 0; + queue_count = min_t(int, queue_count, num_online_cpus()); new_rss_size = min_t(int, queue_count, pf->rss_size_max); if (queue_count != vsi->num_queue_pairs) { @@ -13859,6 +13860,29 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr); } +/** + * i40e_set_fec_in_flags - helper function for setting FEC options in flags + * @fec_cfg: FEC option to set in flags + * @flags: ptr to flags in which we set FEC option + **/ +void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags) +{ + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) + *flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC; + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) { + *flags |= I40E_FLAG_RS_FEC; + *flags &= ~I40E_FLAG_BASE_R_FEC; + } + if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) || + (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) { + *flags |= I40E_FLAG_BASE_R_FEC; + *flags &= ~I40E_FLAG_RS_FEC; + } + if (fec_cfg == 0) + *flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC); +} + /** * i40e_probe - Device initialization routine * @pdev: PCI device information struct @@ -14350,6 +14374,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->hw.phy.link_info.requested_speeds = abilities.link_speed; + /* set the FEC config due to the board capabilities */ + i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags); + /* get the supported phy types from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2ac23ebfbf31..831d52bc3c9a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2069,6 +2069,11 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } + if (qci->num_queue_pairs > I40E_MAX_VF_QUEUES) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + for (i = 0; i < qci->num_queue_pairs; i++) { qpi = &qci->qpair[i]; @@ -3384,8 +3389,8 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) dev_info(&pf->pdev->dev, "VF %d: Invalid input/s, can't apply cloud filter\n", vf->vf_id); - aq_ret = I40E_ERR_PARAM; - goto err; + aq_ret = I40E_ERR_PARAM; + goto err; } cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); @@ -3656,7 +3661,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, int ret; pf->vf_aq_requests++; - if (local_vf_id >= pf->num_alloc_vfs) + if (local_vf_id < 0 || local_vf_id >= pf->num_alloc_vfs) return -EINVAL; vf = &(pf->vf[local_vf_id]); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 870cf654e436..96d849460d9b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -9,69 +9,6 @@ #include "i40e_txrx_common.h" #include "i40e_xsk.h" -/** - * i40e_alloc_xsk_umems - Allocate an array to store per ring UMEMs - * @vsi: Current VSI - * - * Returns 0 on success, <0 on failure - **/ -static int i40e_alloc_xsk_umems(struct i40e_vsi *vsi) -{ - if (vsi->xsk_umems) - return 0; - - vsi->num_xsk_umems_used = 0; - vsi->num_xsk_umems = vsi->alloc_queue_pairs; - vsi->xsk_umems = kcalloc(vsi->num_xsk_umems, sizeof(*vsi->xsk_umems), - GFP_KERNEL); - if (!vsi->xsk_umems) { - vsi->num_xsk_umems = 0; - return -ENOMEM; - } - - return 0; -} - -/** - * i40e_add_xsk_umem - Store a UMEM for a certain ring/qid - * @vsi: Current VSI - * @umem: UMEM to store - * @qid: Ring/qid to associate with the UMEM - * - * Returns 0 on success, <0 on failure - **/ -static int i40e_add_xsk_umem(struct i40e_vsi *vsi, struct xdp_umem *umem, - u16 qid) -{ - int err; - - err = i40e_alloc_xsk_umems(vsi); - if (err) - return err; - - vsi->xsk_umems[qid] = umem; - vsi->num_xsk_umems_used++; - - return 0; -} - -/** - * i40e_remove_xsk_umem - Remove a UMEM for a certain ring/qid - * @vsi: Current VSI - * @qid: Ring/qid associated with the UMEM - **/ -static void i40e_remove_xsk_umem(struct i40e_vsi *vsi, u16 qid) -{ - vsi->xsk_umems[qid] = NULL; - vsi->num_xsk_umems_used--; - - if (vsi->num_xsk_umems == 0) { - kfree(vsi->xsk_umems); - vsi->xsk_umems = NULL; - vsi->num_xsk_umems = 0; - } -} - /** * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev * @vsi: Current VSI @@ -140,6 +77,7 @@ static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem) static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, u16 qid) { + struct net_device *netdev = vsi->netdev; struct xdp_umem_fq_reuse *reuseq; bool if_running; int err; @@ -150,12 +88,9 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, if (qid >= vsi->num_queue_pairs) return -EINVAL; - if (vsi->xsk_umems) { - if (qid >= vsi->num_xsk_umems) - return -EINVAL; - if (vsi->xsk_umems[qid]) - return -EBUSY; - } + if (qid >= netdev->real_num_rx_queues || + qid >= netdev->real_num_tx_queues) + return -EINVAL; reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count); if (!reuseq) @@ -173,13 +108,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, err = i40e_queue_pair_disable(vsi, qid); if (err) return err; - } - err = i40e_add_xsk_umem(vsi, umem, qid); - if (err) - return err; - - if (if_running) { err = i40e_queue_pair_enable(vsi, qid); if (err) return err; @@ -197,11 +126,13 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, **/ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) { + struct net_device *netdev = vsi->netdev; + struct xdp_umem *umem; bool if_running; int err; - if (!vsi->xsk_umems || qid >= vsi->num_xsk_umems || - !vsi->xsk_umems[qid]) + umem = xdp_get_umem_from_qid(netdev, qid); + if (!umem) return -EINVAL; if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi); @@ -212,8 +143,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) return err; } - i40e_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]); - i40e_remove_xsk_umem(vsi, qid); + i40e_xsk_umem_dma_unmap(vsi, umem); if (if_running) { err = i40e_queue_pair_enable(vsi, qid); @@ -237,20 +167,18 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid) int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem, u16 qid) { + struct net_device *netdev = vsi->netdev; + struct xdp_umem *queried_umem; + if (vsi->type != I40E_VSI_MAIN) return -EINVAL; - if (qid >= vsi->num_queue_pairs) + queried_umem = xdp_get_umem_from_qid(netdev, qid); + + if (!queried_umem) return -EINVAL; - if (vsi->xsk_umems) { - if (qid >= vsi->num_xsk_umems) - return -EINVAL; - *umem = vsi->xsk_umems[qid]; - return 0; - } - - *umem = NULL; + *umem = queried_umem; return 0; } @@ -945,13 +873,11 @@ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) **/ bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi) { + struct net_device *netdev = vsi->netdev; int i; - if (!vsi->xsk_umems) - return false; - for (i = 0; i < vsi->num_queue_pairs; i++) { - if (vsi->xsk_umems[i]) + if (xdp_get_umem_from_qid(netdev, i)) return true; } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index fe1592ae8769..ca54e268d157 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -515,7 +515,7 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - struct mutex stats64_lock; + spinlock_t stats64_lock; struct rtnl_link_stats64 stats64; /* structs defined in e1000_hw.h */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7426060b678f..c57671068245 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2295,7 +2295,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, int i, j; char *p; - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { @@ -2338,7 +2338,7 @@ static void igb_get_ethtool_stats(struct net_device *netdev, } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); } static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3615e2e52399..dfa357b1a9d6 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2203,9 +2203,9 @@ void igb_down(struct igb_adapter *adapter) del_timer_sync(&adapter->phy_info_timer); /* record the stats before reset*/ - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); adapter->link_speed = 0; adapter->link_duplex = 0; @@ -3841,7 +3841,7 @@ static int igb_sw_init(struct igb_adapter *adapter) adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); - mutex_init(&adapter->stats64_lock); + spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { case e1000_82576: @@ -5407,9 +5407,9 @@ no_wait: } } - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *tx_ring = adapter->tx_ring[i]; @@ -6236,10 +6236,10 @@ static void igb_get_stats64(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); - mutex_lock(&adapter->stats64_lock); + spin_lock(&adapter->stats64_lock); igb_update_stats(adapter); memcpy(stats, &adapter->stats64, sizeof(*stats)); - mutex_unlock(&adapter->stats64_lock); + spin_unlock(&adapter->stats64_lock); } /** diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index f9149d2a4694..43656f961891 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -1,6 +1,6 @@ config NET_VENDOR_MEDIATEK bool "MediaTek ethernet driver" - depends on ARCH_MEDIATEK + depends on ARCH_MEDIATEK || SOC_MT7621 ---help--- If you have a Mediatek SoC with ethernet, say Y. diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 49f926b7a91c..94d4663e3933 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1745,6 +1745,22 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) return IRQ_HANDLED; } +static irqreturn_t mtk_handle_irq(int irq, void *_eth) +{ + struct mtk_eth *eth = _eth; + + if (mtk_r32(eth, MTK_PDMA_INT_MASK) & MTK_RX_DONE_INT) { + if (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT) + mtk_handle_irq_rx(irq, _eth); + } + if (mtk_r32(eth, MTK_QDMA_INT_MASK) & MTK_TX_DONE_INT) { + if (mtk_r32(eth, MTK_QMTK_INT_STATUS) & MTK_TX_DONE_INT) + mtk_handle_irq_tx(irq, _eth); + } + + return IRQ_HANDLED; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void mtk_poll_controller(struct net_device *dev) { @@ -2485,7 +2501,10 @@ static int mtk_probe(struct platform_device *pdev) } for (i = 0; i < 3; i++) { - eth->irq[i] = platform_get_irq(pdev, i); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0) + eth->irq[i] = eth->irq[0]; + else + eth->irq[i] = platform_get_irq(pdev, i); if (eth->irq[i] < 0) { dev_err(&pdev->dev, "no IRQ%d resource found\n", i); return -ENXIO; @@ -2528,13 +2547,21 @@ static int mtk_probe(struct platform_device *pdev) goto err_deinit_hw; } - err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, - dev_name(eth->dev), eth); - if (err) - goto err_free_dev; + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT)) { + err = devm_request_irq(eth->dev, eth->irq[0], + mtk_handle_irq, 0, + dev_name(eth->dev), eth); + } else { + err = devm_request_irq(eth->dev, eth->irq[1], + mtk_handle_irq_tx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; - err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, - dev_name(eth->dev), eth); + err = devm_request_irq(eth->dev, eth->irq[2], + mtk_handle_irq_rx, 0, + dev_name(eth->dev), eth); + } if (err) goto err_free_dev; @@ -2607,6 +2634,12 @@ static const struct mtk_soc_data mt2701_data = { .required_pctl = true, }; +static const struct mtk_soc_data mt7621_data = { + .caps = MTK_SHARED_INT, + .required_clks = MT7621_CLKS_BITMAP, + .required_pctl = false, +}; + static const struct mtk_soc_data mt7622_data = { .caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW | MTK_HWLRO, .required_clks = MT7622_CLKS_BITMAP, @@ -2621,6 +2654,7 @@ static const struct mtk_soc_data mt7623_data = { const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt2701-eth", .data = &mt2701_data}, + { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data}, { .compatible = "mediatek,mt7622-eth", .data = &mt7622_data}, { .compatible = "mediatek,mt7623-eth", .data = &mt7623_data}, {}, diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 46819297fc3e..f7501997cea0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -363,6 +363,7 @@ #define ETHSYS_CHIPID4_7 0x4 #define MT7623_ETH 7623 #define MT7622_ETH 7622 +#define MT7621_ETH 7621 /* ethernet subsystem config register */ #define ETHSYS_SYSCFG0 0x14 @@ -488,6 +489,8 @@ enum mtk_clks_map { BIT(MTK_CLK_SGMII_CDR_FB) | \ BIT(MTK_CLK_SGMII_CK) | \ BIT(MTK_CLK_ETH2PLL)) +#define MT7621_CLKS_BITMAP (0) + enum mtk_dev_state { MTK_HW_INIT, MTK_RESETTING @@ -567,6 +570,7 @@ struct mtk_rx_ring { #define MTK_DUAL_GMAC_SHARED_SGMII (BIT(11) | MTK_GMAC1_SGMII | \ MTK_GMAC2_SGMII) #define MTK_HWLRO BIT(12) +#define MTK_SHARED_INT BIT(13) #define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x)) /* struct mtk_eth_data - This is the structure holding all differences diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index db909b6069b5..65f8a4b6ed0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -306,14 +306,16 @@ static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) if (entries_per_copy < entries) { for (i = 0; i < entries / entries_per_copy; i++) { - err = copy_to_user(buf, init_ents, PAGE_SIZE); + err = copy_to_user((void __user *)buf, init_ents, PAGE_SIZE) ? + -EFAULT : 0; if (err) goto out; buf += PAGE_SIZE; } } else { - err = copy_to_user(buf, init_ents, entries * cqe_size); + err = copy_to_user((void __user *)buf, init_ents, entries * cqe_size) ? + -EFAULT : 0; } out: diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 4953c852c247..a5be27772b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -100,7 +100,7 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) req_not << 31), eq->doorbell); /* We still want ordering, just not swabbing, so add a barrier */ - mb(); + wmb(); } static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor, @@ -558,6 +558,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", __func__, be32_to_cpu(eqe->event.srq.srqn), eq->eqn); + /* fall through */ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 7df728f1e5b5..6e501af0e532 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -2067,9 +2067,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u64 qword_field; u32 dword_field; - int err; + u16 word_field; u8 byte_field; + int err; static const u8 a0_dmfs_query_hw_steering[] = { [0] = MLX4_STEERING_DMFS_A0_DEFAULT, [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, @@ -2097,19 +2099,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* QPC/EEC/CQC/EQC/RDMARC attributes */ - MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); - MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); - MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); - MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); - MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); - MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); - MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); - MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); - MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); - MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); - MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); - MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); - MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); + param->qpc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); + param->log_num_qps = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); + param->srqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); + param->log_num_srqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); + param->cqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); + param->log_num_cqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); + param->altc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); + param->auxc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); + param->eqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); + param->log_num_eqs = byte_field & 0x1f; + MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); + param->num_sys_eqs = word_field & 0xfff; + MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); + param->rdmarc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); + param->log_rd_per_qp = byte_field & 0x7; MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { @@ -2128,22 +2143,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); - MLX4_GET(byte_field, outbox, - INIT_HCA_FS_A0_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); param->dmfs_high_steer_mode = a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + param->log_mc_hash_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; } /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ @@ -2167,15 +2181,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); - MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); - MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enabled = byte_field >> 7; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + param->log_mpt_sz = byte_field & 0x3f; MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); - MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + param->log_uar_sz = byte_field & 0xf; /* phv_check enable */ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 6bb2a860b15b..9de9abacf7f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -22,7 +22,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o + en_selftest.o en/port.o en/monitor_stats.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 424457ff9759..8ecac81a385d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -258,6 +258,8 @@ const char *parse_fs_dst(struct trace_seq *p, return ret; } +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index d027ce00c8ce..a4cf123e3f17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -61,6 +61,41 @@ const char *parse_fs_dst(struct trace_seq *p, const struct mlx5_flow_destination *dst, u32 counter_id); +TRACE_EVENT(mlx5_fs_add_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + __field(u32, level) + __field(u32, type) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + __entry->level = ft->level; + __entry->type = ft->type; + ), + TP_printk("ft=%p id=%u level=%u type=%u \n", + __entry->ft, __entry->id, __entry->level, __entry->type) + ); + +TRACE_EVENT(mlx5_fs_del_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + + ), + TP_printk("ft=%p id=%u\n", + __entry->ft, __entry->id) + ); + TRACE_EVENT(mlx5_fs_add_fg, TP_PROTO(const struct mlx5_flow_group *fg), TP_ARGS(fg), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 27e276c9bf84..6dd74ef69389 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -76,15 +76,14 @@ struct page_pool; #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define MLX5E_RX_MAX_HEAD (256) + #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) -#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) -#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) -#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ - (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ + MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ @@ -119,8 +118,6 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_RX_MAX_HEAD (256) - #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 @@ -309,16 +306,18 @@ struct mlx5e_cq { struct mlx5_core_cq mcq; struct mlx5e_channel *channel; + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_cq_decomp { /* cqe decompression */ struct mlx5_cqe64 title; struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; u8 mini_arr_idx; - u16 decmprs_left; - u16 decmprs_wqe_counter; - - /* control */ - struct mlx5_core_dev *mdev; - struct mlx5_wq_ctrl wq_ctrl; + u16 left; + u16 wqe_counter; } ____cacheline_aligned_in_smp; struct mlx5e_tx_wqe_info { @@ -388,7 +387,10 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct work_struct recover_work; + struct mlx5e_txqsq_recover { + struct work_struct recover_work; + u64 last_recover; + } recover; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -578,6 +580,7 @@ struct mlx5e_rq { struct net_device *netdev; struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; + struct mlx5e_cq_decomp cqd; struct mlx5e_page_cache page_cache; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; @@ -635,6 +638,7 @@ struct mlx5e_channel { struct hwtstamp_config *tstamp; int ix; int cpu; + cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -679,13 +683,6 @@ struct mlx5e_rss_params { u8 hfunc; }; -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - int rl_update; - int rl_index; -}; - struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -741,7 +738,6 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif - struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -808,6 +804,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, void mlx5e_update_stats(struct mlx5e_priv *priv); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); int mlx5e_self_test_num(struct mlx5e_priv *priv); @@ -871,11 +868,6 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p); -void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); -void mlx5e_tx_disable_queue(struct netdev_queue *txq); - static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h deleted file mode 100644 index 2335c5b48820..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2018 Mellanox Technologies. */ - -#ifndef __MLX5E_EN_REPORTER_H -#define __MLX5E_EN_REPORTER_H - -#include -#include "en.h" - -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); -void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); - -#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c deleted file mode 100644 index d9675afbb924..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ /dev/null @@ -1,356 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2018 Mellanox Technologies. */ - -#include -#include "reporter.h" -#include "lib/eq.h" - -#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 - -struct mlx5e_tx_err_ctx { - int (*recover)(struct mlx5e_txqsq *sq); - struct mlx5e_txqsq *sq; -}; - -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) -{ - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); - - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; - - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return err; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } - - mlx5e_tx_disable_queue(sq->txq); - - err = mlx5e_wait_for_sq_flush(sq); - if (err) - return err; - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - - err = mlx5e_sq_to_ready(sq, state); - if (err) - return err; - - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - mlx5e_activate_txqsq(sq); - - return 0; -} - -void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) -{ - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - struct mlx5e_tx_err_ctx err_ctx = {0}; - - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; - sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); - - devlink_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); -} - -static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) -{ - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->core.eqn, eq->core.cons_index, eq->core.irqn); - - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) { - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - return 1; - } - - netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", - eqe_count, eq->core.eqn); - sq->channel->stats->eq_rearm++; - return 0; -} - -void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) -{ - struct mlx5e_tx_err_ctx err_ctx; - char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; - - err_ctx.sq = sq; - err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - sprintf(err_str, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); - devlink_health_report(sq->channel->priv->tx_reporter, err_str, - &err_ctx); -} - -/* state lock cannot be grabbed within this function. - * It can cause a dead lock or a read-after-free. - */ -int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) -{ - return err_ctx->recover(err_ctx->sq); -} - -static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) -{ - int err; - - mutex_lock(&priv->state_lock); - mlx5e_close_locked(priv->netdev); - err = mlx5e_open_locked(priv->netdev); - mutex_unlock(&priv->state_lock); - - return err; -} - -static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, - void *context) -{ - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_tx_err_ctx *err_ctx = context; - - return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : - mlx5e_tx_reporter_recover_all(priv); -} - -static int -mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer, - u32 sqn, u8 state, u8 stopped) -{ - int err, i; - int nest = 0; - char name[20]; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); - if (err) - goto buffer_error; - nest++; - - sprintf(name, "SQ 0x%x", sqn); - err = devlink_health_buffer_put_object_name(buffer, name); - if (err) - goto buffer_error; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_put_object_name(buffer, "HW state"); - if (err) - goto buffer_error; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_put_value_u8(buffer, state); - if (err) - goto buffer_error; - - devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */ - nest--; - - devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */ - nest--; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_put_object_name(buffer, "stopped"); - if (err) - goto buffer_error; - - err = devlink_health_buffer_nest_start(buffer, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE); - if (err) - goto buffer_error; - nest++; - - err = devlink_health_buffer_put_value_u8(buffer, stopped); - if (err) - goto buffer_error; - - for (i = 0; i < nest; i++) - devlink_health_buffer_nest_end(buffer); - - return 0; - -buffer_error: - for (i = 0; i < nest; i++) - devlink_health_buffer_nest_cancel(buffer); - return err; -} - -static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_health_buffer **buffers_array, - unsigned int buffer_size, - unsigned int num_buffers) -{ - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - unsigned int buff = 0; - int i = 0, err = 0; - - if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN) - return -ENOMEM; - - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mutex_unlock(&priv->state_lock); - return 0; - } - - while (i < priv->channels.num * priv->channels.params.num_tc) { - struct mlx5e_txqsq *sq = priv->txq2sq[i]; - u8 state; - - err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); - if (err) - break; - - err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff], - sq->sqn, state, - netif_xmit_stopped(sq->txq)); - if (err) { - if (++buff == num_buffers) - break; - } else { - i++; - } - } - - mutex_unlock(&priv->state_lock); - return err; -} - -static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { - .name = "TX", - .recover = mlx5e_tx_reporter_recover, - .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC * - MLX5E_TX_REPORTER_PER_SQ_MAX_LEN, - .diagnose = mlx5e_tx_reporter_diagnose, - .dump_size = 0, - .dump = NULL, -}; - -#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 -int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct devlink *devlink = priv_to_devlink(mdev); - - priv->tx_reporter = - devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, - MLX5_REPORTER_TX_GRACEFUL_PERIOD, - true, priv); - return PTR_ERR_OR_ZERO(priv->tx_reporter); -} - -void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) -{ - devlink_health_reporter_destroy(priv->tx_reporter); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c9df08133718..3bbccead2f63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -844,9 +844,12 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); - if (get_fec_supported_advertised(mdev, link_ksettings)) + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dee0c8f3d4e9..099d307e6f25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -51,7 +51,6 @@ #include "en/xdp.h" #include "lib/eq.h" #include "en/monitor_stats.h" -#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -172,8 +171,7 @@ static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); - return MLX5E_MPWQE_STRIDE_SZ(mdev, - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, @@ -951,7 +949,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; @@ -1161,7 +1159,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +static void mlx5e_sq_recover(struct work_struct *work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1183,7 +1181,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1271,8 +1269,15 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + bool rl_update; + int rl_index; +}; + +static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1370,7 +1375,17 @@ err_free_txqsq: return err; } -void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1379,7 +1394,7 @@ void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -void mlx5e_tx_disable_queue(struct netdev_queue *txq) +static inline void netif_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1395,7 +1410,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - mlx5e_tx_disable_queue(sq->txq); + netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1415,7 +1430,6 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); - cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1425,15 +1439,105 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { - struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, - recover_work); + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); - if (!sq->channel->priv->tx_reporter) + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static void mlx5e_sq_recover(struct work_struct *work) +{ + struct mlx5e_txqsq_recover *recover = + container_of(work, struct mlx5e_txqsq_recover, + recover_work); + struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, + recover); + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return; + } + + if (state != MLX5_RQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return; + } + + netif_tx_disable_queue(sq->txq); + + if (mlx5e_wait_for_sq_flush(sq)) return; - mlx5e_tx_reporter_err_cqe(sq); + /* If the interval between two consecutive recovers per SQ is too + * short, don't recover to avoid infinite loop of ERR_CQE -> recover. + * If we reached this state, there is probably a bug that needs to be + * fixed. let's keep the queue close and let tx timeout cleanup. + */ + if (jiffies_to_msecs(jiffies - recover->last_recover) < + MLX5E_SQ_RECOVER_MIN_INTERVAL) { + netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", + sq->sqn); + return; + } + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + if (mlx5e_sq_to_ready(sq, state)) + return; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + recover->last_recover = jiffies; + mlx5e_activate_txqsq(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -1845,6 +1949,29 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, + struct mlx5e_params *params) +{ + int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); + int irq; + + if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) + return -ENOMEM; + + for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); + + cpumask_set_cpu(cpu, c->xps_cpumask); + } + + return 0; +} + +static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) +{ + free_cpumask_var(c->xps_cpumask); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -1877,9 +2004,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -1962,6 +2092,9 @@ err_close_icosq_cq: err_napi_del: netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); + +err_free_channel: kvfree(c); return err; @@ -1974,7 +2107,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -2002,6 +2135,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -3102,7 +3236,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; - mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -3388,11 +3521,32 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) +{ + int i; + + for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { + struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + int j; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } +} + void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; @@ -3407,12 +3561,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } else { - mlx5e_grp_sw_update_stats(priv); - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); } stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; @@ -4074,14 +4223,31 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, + struct mlx5e_txqsq *sq) +{ + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; + + netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + if (!eqe_count) + return false; + + netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; + return true; +} + static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - int i; - - if (!priv->tx_reporter) - return; + struct net_device *dev = priv->netdev; + bool reopen_channels = false; + int i, err; rtnl_lock(); mutex_lock(&priv->state_lock); @@ -4090,16 +4256,36 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { - struct netdev_queue *dev_queue = - netdev_get_tx_queue(priv->netdev, i); + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(dev_queue)) continue; - mlx5e_tx_reporter_timeout(sq); + netdev_err(dev, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - dev_queue->trans_start)); + + /* If we recover a lost interrupt, most likely TX timeout will + * be resolved, skip reopening channels + */ + if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + reopen_channels = true; + } } + if (!reopen_channels) + goto unlock; + + mlx5e_close_locked(dev); + err = mlx5e_open_locked(dev); + if (err) + netdev_err(priv->netdev, + "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); + unlock: mutex_unlock(&priv->state_lock); rtnl_unlock(); @@ -4767,7 +4953,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif - mlx5e_tx_reporter_create(priv); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 96cc0c6a4014..5d2e0c2f6624 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -58,7 +58,8 @@ struct mlx5e_rep_indr_block_priv { struct list_head list; }; -static void mlx5e_rep_indr_unregister_block(struct net_device *netdev); +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev); static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -161,26 +162,16 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - int i, j; + struct rtnl_link_stats64 stats64 = {}; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; + mlx5e_fold_sw_stats64(priv, &stats64); - rq_stats = c->rq.stats; - - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; - - for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = c->sq[j].stats; - - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - } - } + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; } static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, @@ -193,8 +184,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, return; mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_sw_counters(priv); mlx5e_rep_update_hw_counters(priv); mutex_unlock(&priv->state_lock); @@ -663,7 +653,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; list_for_each_entry_safe(cb_priv, temp, head, list) { - mlx5e_rep_indr_unregister_block(cb_priv->netdev); + mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); kfree(cb_priv); } } @@ -735,7 +725,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, err = tcf_block_cb_register(f->block, mlx5e_rep_indr_setup_block_cb, - netdev, indr_priv, f->extack); + indr_priv, indr_priv, f->extack); if (err) { list_del(&indr_priv->list); kfree(indr_priv); @@ -743,14 +733,15 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, return err; case TC_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (!indr_priv) + return -ENOENT; + tcf_block_cb_unregister(f->block, mlx5e_rep_indr_setup_block_cb, - netdev); - indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); - if (indr_priv) { - list_del(&indr_priv->list); - kfree(indr_priv); - } + indr_priv); + list_del(&indr_priv->list); + kfree(indr_priv); return 0; default: @@ -779,7 +770,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, err = __tc_indr_block_cb_register(netdev, rpriv, mlx5e_rep_indr_setup_tc_cb, - netdev); + rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -789,10 +780,11 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, return err; } -static void mlx5e_rep_indr_unregister_block(struct net_device *netdev) +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) { __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, - netdev); + rpriv); } static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, @@ -811,7 +803,7 @@ static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, mlx5e_rep_indr_register_block(rpriv, netdev); break; case NETDEV_UNREGISTER: - mlx5e_rep_indr_unregister_block(netdev); + mlx5e_rep_indr_unregister_block(rpriv, netdev); break; } return NOTIFY_OK; @@ -1122,9 +1114,17 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - int ret; + int ret, pf_num; + + ret = mlx5_lag_get_pf_num(priv->mdev, &pf_num); + if (ret) + return ret; + + if (rep->vport == FDB_UPLINK_VPORT) + ret = snprintf(buf, len, "p%d", pf_num); + else + ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1); - ret = snprintf(buf, len, "%d", rep->vport - 1); if (ret >= len) return -EOPNOTSUPP; @@ -1225,17 +1225,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; - - mlx5e_rep_update_sw_counters(priv); - - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); return 0; } @@ -1281,6 +1272,18 @@ static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) return 0; } +static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); + + if (vlan != 0) + return -EOPNOTSUPP; + + /* allow setting 0-vid for compatibility with libvirt */ + return 0; +} + static const struct switchdev_ops mlx5e_rep_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; @@ -1315,6 +1318,7 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_set_vf_rate = mlx5e_set_vf_rate, .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, }; bool mlx5e_eswitch_rep(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1d0bb5ff8c26..be396e5e4e39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -52,40 +52,45 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) return config->rx_filter == HWTSTAMP_FILTER_ALL; } -static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, - void *data) +static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, + u32 cqcc, void *data) { - u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc); + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); - memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); + memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); } static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, &cq->title); - cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); - cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + mlx5e_read_cqe_slot(wq, cqcc, title); + cqd->left = be32_to_cpu(title->byte_cnt); + cqd->wqe_counter = be16_to_cpu(title->wqe_counter); rq->stats->cqe_compress_blks++; } -static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, + struct mlx5e_cq_decomp *cqd, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); - cq->mini_arr_idx = 0; + mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); + cqd->mini_arr_idx = 0; } -static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) { - struct mlx5_cqwq *wq = &cq->wq; - + u32 cqcc = wq->cc; u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); u32 wq_sz = mlx5_cqwq_get_size(wq); u32 ci_top = min_t(u32, wq_sz, ci + n); for (; ci < ci_top; ci++, n--) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -93,7 +98,7 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) if (unlikely(ci == wq_sz)) { op_own = !op_own; for (ci = 0; ci < n; ci++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -101,68 +106,79 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) } static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; - cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; - cq->title.op_own &= 0xf0; - cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz); - cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; + struct mlx5_cqe64 *title = &cqd->title; + + title->byte_cnt = mini_cqe->byte_cnt; + title->check_sum = mini_cqe->checksum; + title->op_own &= 0xf0; + title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - cq->decmprs_wqe_counter += - mpwrq_get_cqe_consumed_strides(&cq->title); + cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); else - cq->decmprs_wqe_counter = - mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_decompress_cqe(rq, cq, cqcc); - cq->title.rss_hash_type = 0; - cq->title.rss_hash_result = 0; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + + mlx5e_decompress_cqe(rq, wq, cqcc); + cqd->title.rss_hash_type = 0; + cqd->title.rss_hash_result = 0; } static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int update_owner_only, int budget_rem) { - u32 cqcc = cq->wq.cc + update_owner_only; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc = wq->cc + update_owner_only; u32 cqe_count; u32 i; - cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + cqe_count = min_t(u32, cqd->left, budget_rem); for (i = update_owner_only; i < cqe_count; - i++, cq->mini_arr_idx++, cqcc++) { - if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) - mlx5e_read_mini_arr_slot(cq, cqcc); + i++, cqd->mini_arr_idx++, cqcc++) { + if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(wq, cqd, cqcc); - mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); - rq->handle_rx_cqe(rq, &cq->title); + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + rq->handle_rx_cqe(rq, &cqd->title); } - mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); - cq->wq.cc = cqcc; - cq->decmprs_left -= cqe_count; + mlx5e_cqes_update_owner(wq, cqcc - wq->cc); + wq->cc = cqcc; + cqd->left -= cqe_count; rq->stats->cqe_compress_pkts += cqe_count; return cqe_count; } static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int budget_rem) { - mlx5e_read_title_slot(rq, cq, cq->wq.cc); - mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); - mlx5e_decompress_cqe(rq, cq, cq->wq.cc); - rq->handle_rx_cqe(rq, &cq->title); - cq->mini_arr_idx++; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cc = wq->cc; - return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; + mlx5e_read_title_slot(rq, wq, cc); + mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); + mlx5e_decompress_cqe(rq, wq, cc); + rq->handle_rx_cqe(rq, &cqd->title); + cqd->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; } static inline bool mlx5e_page_is_reserved(struct page *page) @@ -369,7 +385,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, static inline void mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, struct mlx5e_dma_info *dma_info, - int offset_from, int offset_to, u32 headlen) + int offset_from, u32 headlen) { const void *from = page_address(dma_info->page) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ @@ -377,24 +393,7 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data_offset(skb, offset_to, from, len); -} - -static inline void -mlx5e_copy_skb_header_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_dma_info *dma_info, - u32 offset, u32 headlen) -{ - u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); - - mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg); - - if (unlikely(offset + headlen > PAGE_SIZE)) { - dma_info++; - mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg, - headlen - headlen_pg); - } + skb_copy_to_linear_data(skb, from, len); } static void @@ -732,6 +731,8 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) ((struct ipv6hdr *)ip_p)->nexthdr; } +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -754,6 +755,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) goto csum_unnecessary; + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) + goto csum_unnecessary; + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; @@ -960,8 +972,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, - 0, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1083,8 +1094,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w di++; } /* copy header */ - mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di, - head_offset, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1190,16 +1200,17 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5_cqwq *cqwq = &cq->wq; struct mlx5_cqe64 *cqe; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; - if (cq->decmprs_left) - work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (rq->cqd.left) + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); - cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(cqwq); if (!cqe) { if (unlikely(work_done)) goto out; @@ -1209,21 +1220,21 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += - mlx5e_decompress_cqes_start(rq, cq, + mlx5e_decompress_cqes_start(rq, cqwq, budget - work_done); continue; } - mlx5_cqwq_pop(&cq->wq); + mlx5_cqwq_pop(cqwq); rq->handle_rx_cqe(rq, cqe); - } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: if (rq->xdp_prog) mlx5e_xdp_rx_poll_complete(rq); - mlx5_cqwq_update_db_record(&cq->wq); + mlx5_cqwq_update_db_record(cqwq); /* ensure cq space is freed before enabling more cqes */ wmb(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index d3fe48ff9da9..1a78e05cbba8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -127,9 +127,9 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats temp, *s = &temp; + struct mlx5e_sw_stats *s = &priv->stats.sw; int i; memset(s, 0, sizeof(*s)); @@ -212,8 +212,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_cqes += sq_stats->cqes; } } - - memcpy(&priv->stats.sw, s, sizeof(*s)); } static const struct counter_desc q_stats_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index fe91ec06e3c7..4640d4f986f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -277,7 +277,6 @@ struct mlx5e_stats_grp { extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; extern const int mlx5e_num_stats_grps; -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv); void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cae6c6d48984..74159d39dd66 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2767,14 +2767,13 @@ err_free: return err; } -static int +static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, - struct mlx5_core_dev *in_mdev, - struct mlx5e_tc_flow **__flow) + struct mlx5_core_dev *in_mdev) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -2814,15 +2813,13 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - *__flow = flow; - - return 0; + return flow; err_free: kfree(flow); kvfree(parse_attr); out: - return err; + return ERR_PTR(err); } static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, @@ -2855,11 +2852,13 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, in_mdev = priv->mdev; parse_attr = flow->esw_attr->parse_attr; - err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, - parse_attr->filter_dev, - flow->esw_attr->in_rep, in_mdev, &peer_flow); - if (err) + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); goto out; + } flow->peer_flow = peer_flow; flow->flags |= MLX5E_TC_FLOW_DUP; @@ -2885,10 +2884,10 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int err; - err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, - in_mdev, &flow); - if (err) - goto out; + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow); if (is_peer_flow_needed(flow)) { err = mlx5e_tc_add_fdb_peer_flow(f, flow); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index a8e052a5ce36..598ad7e4d5c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); queue_work(cq->channel->priv->wq, - &sq->recover_work); + &sq->recover.recover_work); } stats->cqe_err++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index a44ea7b85614..5b492b67f4e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1134,13 +1134,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, int err = 0; u8 *smac_v; - if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } - esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { @@ -1728,7 +1721,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) int vport_num; int err; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, @@ -1797,7 +1790,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); @@ -1827,13 +1820,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) mlx5_core_warn(esw->dev, - "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + "Set invalid MAC while spoofchk is on, vport(%d)\n", vport); - err = -EPERM; - goto unlock; - } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { @@ -1979,6 +1969,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, evport = &esw->vports[vport]; pschk = evport->info.spoofchk; evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 79f122b45def..b2df7c3749fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -397,6 +397,7 @@ static void del_hw_flow_table(struct fs_node *node) fs_get_obj(ft, node); dev = get_dev(&ft->node); root = find_root(&ft->node); + trace_mlx5_fs_del_ft(ft); if (node->active) { err = root->cmds->destroy_flow_table(dev, ft); @@ -1019,6 +1020,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa fs_prio->num_ft++; up_write_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); + trace_mlx5_fs_add_ft(ft); return ft; destroy_ft: root->cmds->destroy_flow_table(root->dev, ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 3a6baed722d8..2d223385dc81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -616,6 +616,27 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } } +int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num) +{ + struct mlx5_lag *ldev; + int n; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) { + mlx5_core_warn(dev, "no lag device, can't get pf num\n"); + return -EINVAL; + } + + for (n = 0; n < MLX5_MAX_PORTS; n++) + if (ldev->pf[n].dev == dev) { + *pf_num = n; + return 0; + } + + mlx5_core_warn(dev, "wasn't able to locate pf in the lag device\n"); + return -EINVAL; +} + /* Must be called with intf_mutex held */ void mlx5_lag_remove(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index c68dcea5985b..5300b0b6d836 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -187,6 +187,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } +int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num); + void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); void mlx5_lag_update(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 388f205a497f..370ca94b6775 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -44,14 +44,15 @@ static struct mlx5_core_rsc_common * mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { struct mlx5_core_rsc_common *common; + unsigned long flags; - spin_lock(&table->lock); + spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); if (common) atomic_inc(&common->refcount); - spin_unlock(&table->lock); + spin_unlock_irqrestore(&table->lock, flags); return common; } diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 7a712b6b09ec..14c0c62f8e73 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_H #define _MLXFW_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 2cf89126fb23..240c027e5f07 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "mlxfw: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 993cb5ba934e..544344ac4894 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "mlxfw_mfa2: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h index 20472aa139cd..5bba6ad79d34 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_H #define _MLXFW_MFA2_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h index f667942b1ea3..874c0a2474ae 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_FILE_H #define _MLXFW_MFA2_FILE_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h index dd66737c033d..b001e5258091 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -1,36 +1,6 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + #ifndef _MLXFW_MFA2_FORMAT_H #define _MLXFW_MFA2_FORMAT_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h index cc013e77b326..33c971190bba 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_TLV_H #define _MLXFW_MFA2_TLV_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c index 0094b92a233b..017d68f1e123 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "MFA2: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h index 2c667894f3a2..633284eeded7 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h @@ -1,36 +1,6 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + #ifndef _MLXFW_MFA2_TLV_MULTI_H #define _MLXFW_MFA2_TLV_MULTI_H diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 66b8098c6fd2..a2321fe8d6a0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -604,29 +604,31 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); if (sendq) { struct mlxsw_pci_queue *sdq; sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, cqe); + wqe_counter, ncqe); q->u.cq.comp_sdq_count++; } else { struct mlxsw_pci_queue *rdq; rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, cqe); + wqe_counter, q->u.cq.v, ncqe); q->u.cq.comp_rdq_count++; } if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + if (items) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } } static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) @@ -1365,10 +1367,10 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) - break; + return 0; cond_resched(); } while (time_before(jiffies, end)); - return 0; + return -EBUSY; } static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index bb99f6d41fe0..ffee38e36ce8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF @@ -53,6 +53,7 @@ #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ #define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) #define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 9b48dffc9f63..5f8066ab7d40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5666,6 +5666,8 @@ enum mlxsw_reg_ritr_loopback_protocol { MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, /* IPinIP IPv6 underlay Unicast */ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, + /* IPinIP generic - used for Spectrum-2 underlay RIF */ + MLXSW_REG_RITR_LOOPBACK_GENERIC, }; /* reg_ritr_loopback_protocol @@ -5706,6 +5708,13 @@ MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); */ MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); +/* reg_ritr_loopback_ipip_underlay_rif + * Underlay ingress router interface. + * Reserved for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16); + /* reg_ritr_loopback_ipip_usip* * Encapsulation Underlay source IP. * Access: RW @@ -5821,11 +5830,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 gre_key) { mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif); mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); } @@ -5833,12 +5843,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 usip, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key) { mlxsw_reg_ritr_loopback_protocol_set(payload, MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, - uvr_id, gre_key); + uvr_id, underlay_rif, gre_key); mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } @@ -7200,6 +7210,13 @@ MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); */ MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); +/* reg_rtdp_egress_router_interface + * Underlay egress router interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16); + /* IPinIP */ /* reg_rtdp_ipip_irif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 32519c93df17..a88169738b4a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4094,6 +4094,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4110,6 +4111,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a1c32a81b011..4fe0996c7cdd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -75,6 +75,11 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_MAX, }; +struct mlxsw_sp_rif_ops; + +extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; +extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; + enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, MLXSW_SP_FID_TYPE_8021D, @@ -161,6 +166,7 @@ struct mlxsw_sp { const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; + const struct mlxsw_sp_rif_ops **rif_ops_arr; }; static inline struct mlxsw_sp_upper * @@ -501,6 +507,9 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, const union mlxsw_sp_l3addr *ul_sip); int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, u16 *vr_id); +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index); +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { @@ -712,8 +721,7 @@ struct mlxsw_sp_acl_tcam_ops { void *region_priv, void *chunk_priv, void *entry_priv); int (*entry_action_replace)(struct mlxsw_sp *mlxsw_sp, - void *region_priv, void *chunk_priv, - void *entry_priv, + void *region_priv, void *entry_priv, struct mlxsw_sp_acl_rule_info *rulei); int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *entry_priv, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c index fe270c1a26a6..6e444525713f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -194,8 +194,7 @@ static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp1_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, - void *region_priv, void *chunk_priv, - void *entry_priv, + void *region_priv, void *entry_priv, struct mlxsw_sp_acl_rule_info *rulei) { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index 234ab51916db..d380b3403960 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -212,18 +212,15 @@ static void mlxsw_sp2_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp2_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, - void *region_priv, void *chunk_priv, - void *entry_priv, + void *region_priv, void *entry_priv, struct mlxsw_sp_acl_rule_info *rulei) { struct mlxsw_sp2_acl_tcam_region *region = region_priv; - struct mlxsw_sp2_acl_tcam_chunk *chunk = chunk_priv; struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; entry->act_block = rulei->act_block; return mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp, ®ion->aregion, - &chunk->achunk, &entry->aentry, rulei); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 695d33358988..a69e3462b65e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -742,8 +742,7 @@ int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp, rulei = mlxsw_sp_acl_rule_rulei(rule); rulei->act_block = afa_block; - return ops->rule_action_replace(mlxsw_sp, ruleset->priv, rule->priv, - rule->rulei); + return ops->rule_action_replace(mlxsw_sp, rule->priv, rule->rulei); } struct mlxsw_sp_acl_rule * @@ -806,7 +805,7 @@ static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl) msecs_to_jiffies(interval)); } -static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work) +static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work) { struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl, rule_activity_update.dw.work); @@ -885,7 +884,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) /* Create the delayed work for the rule activity_update */ INIT_DELAYED_WORK(&acl->rule_activity_update.dw, - mlxsw_sp_acl_rul_activity_update_work); + mlxsw_sp_acl_rule_activity_update_work); acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS; mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index 80fb268d51a5..a74a390901ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -7,6 +7,8 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include #include "reg.h" #include "core.h" @@ -390,8 +392,7 @@ mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, if (err) return err; - lkey_id = aregion->ops->lkey_id_get(aregion, aentry->ht_key.enc_key, - erp_id); + lkey_id = aregion->ops->lkey_id_get(aregion, aentry->enc_key, erp_id); if (IS_ERR(lkey_id)) return PTR_ERR(lkey_id); aentry->lkey_id = lkey_id; @@ -399,7 +400,7 @@ mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE, priority, region->tcam_region_info, - aentry->ht_key.enc_key, erp_id, + aentry->enc_key, erp_id, aentry->delta_info.start, aentry->delta_info.mask, aentry->delta_info.value, @@ -424,12 +425,11 @@ mlxsw_sp_acl_atcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id; struct mlxsw_sp_acl_tcam_region *region = aregion->region; u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); - char *enc_key = aentry->ht_key.enc_key; char ptce3_pl[MLXSW_REG_PTCE3_LEN]; mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0, region->tcam_region_info, - enc_key, erp_id, + aentry->enc_key, erp_id, aentry->delta_info.start, aentry->delta_info.mask, aentry->delta_info.value, @@ -458,7 +458,7 @@ mlxsw_sp_acl_atcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp, kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_UPDATE, priority, region->tcam_region_info, - aentry->ht_key.enc_key, erp_id, + aentry->enc_key, erp_id, aentry->delta_info.start, aentry->delta_info.mask, aentry->delta_info.value, @@ -481,15 +481,15 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_afk_encode(afk, region->key_info, &rulei->values, - aentry->full_enc_key, mask); + aentry->ht_key.full_enc_key, mask); erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, false); if (IS_ERR(erp_mask)) return PTR_ERR(erp_mask); aentry->erp_mask = erp_mask; aentry->ht_key.erp_id = mlxsw_sp_acl_erp_mask_erp_id(erp_mask); - memcpy(aentry->ht_key.enc_key, aentry->full_enc_key, - sizeof(aentry->ht_key.enc_key)); + memcpy(aentry->enc_key, aentry->ht_key.full_enc_key, + sizeof(aentry->enc_key)); /* Compute all needed delta information and clear the delta bits * from the encrypted key. @@ -498,8 +498,9 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, aentry->delta_info.start = mlxsw_sp_acl_erp_delta_start(delta); aentry->delta_info.mask = mlxsw_sp_acl_erp_delta_mask(delta); aentry->delta_info.value = - mlxsw_sp_acl_erp_delta_value(delta, aentry->full_enc_key); - mlxsw_sp_acl_erp_delta_clear(delta, aentry->ht_key.enc_key); + mlxsw_sp_acl_erp_delta_value(delta, + aentry->ht_key.full_enc_key); + mlxsw_sp_acl_erp_delta_clear(delta, aentry->enc_key); /* Add rule to the list of A-TCAM rules, assuming this * rule is intended to A-TCAM. In case this rule does @@ -579,6 +580,7 @@ int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, /* It is possible we failed to add the rule to the A-TCAM due to * exceeded number of masks. Try to spill into C-TCAM. */ + trace_mlxsw_sp_acl_atcam_entry_add_ctcam_spill(mlxsw_sp, aregion); err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion, &achunk->cchunk, &aentry->centry, rulei, true); @@ -603,7 +605,6 @@ void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_atcam_chunk *achunk, struct mlxsw_sp_acl_atcam_entry *aentry, struct mlxsw_sp_acl_rule_info *rulei) { @@ -612,7 +613,6 @@ mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_acl_atcam_is_centry(aentry)) err = mlxsw_sp_acl_ctcam_entry_action_replace(mlxsw_sp, &aregion->cregion, - &achunk->cchunk, &aentry->centry, rulei); else diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index f5c381dcb015..9545b572747e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -133,7 +133,7 @@ mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id, sizeof(erp_region_id)); memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET, - &aentry->ht_key.enc_key[chunk_key_offsets[chunk_index]], + &aentry->enc_key[chunk_key_offsets[chunk_index]], MLXSW_BLOOM_CHUNK_KEY_BYTES); chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c index ac222833a5cf..05680a7e6c56 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c @@ -223,7 +223,6 @@ void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ctcam_region *cregion, - struct mlxsw_sp_acl_ctcam_chunk *cchunk, struct mlxsw_sp_acl_ctcam_entry *centry, struct mlxsw_sp_acl_rule_info *rulei) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index fe230acf92a9..11456e1f236f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -781,7 +781,6 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, struct mlxsw_sp_acl_tcam_entry *entry, struct mlxsw_sp_acl_rule_info *rulei) { @@ -789,7 +788,7 @@ mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; struct mlxsw_sp_acl_tcam_region *region = chunk->region; - return ops->entry_action_replace(mlxsw_sp, region->priv, chunk->priv, + return ops->entry_action_replace(mlxsw_sp, region->priv, entry->priv, rulei); } @@ -863,15 +862,6 @@ struct mlxsw_sp_acl_tcam_flower_rule { struct mlxsw_sp_acl_tcam_entry entry; }; -struct mlxsw_sp_acl_tcam_mr_ruleset { - struct mlxsw_sp_acl_tcam_chunk *chunk; - struct mlxsw_sp_acl_tcam_group group; -}; - -struct mlxsw_sp_acl_tcam_mr_rule { - struct mlxsw_sp_acl_tcam_entry entry; -}; - static int mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, @@ -955,7 +945,6 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) static int mlxsw_sp_acl_tcam_flower_rule_action_replace(struct mlxsw_sp *mlxsw_sp, - void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei) { @@ -986,6 +975,15 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, }; +struct mlxsw_sp_acl_tcam_mr_ruleset { + struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_group group; +}; + +struct mlxsw_sp_acl_tcam_mr_rule { + struct mlxsw_sp_acl_tcam_entry entry; +}; + static int mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, @@ -1084,14 +1082,13 @@ mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) static int mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp, - void *ruleset_priv, void *rule_priv, + void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei) { - struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, &rule->entry, + rulei); } static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 0f1a9dee63de..0858d5b06353 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -48,8 +48,7 @@ struct mlxsw_sp_acl_profile_ops { void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); - int (*rule_action_replace)(struct mlxsw_sp *mlxsw_sp, - void *ruleset_priv, void *rule_priv, + int (*rule_action_replace)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, bool *activity); @@ -126,7 +125,6 @@ void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ctcam_entry *centry); int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ctcam_region *cregion, - struct mlxsw_sp_acl_ctcam_chunk *cchunk, struct mlxsw_sp_acl_ctcam_entry *centry, struct mlxsw_sp_acl_rule_info *rulei); static inline unsigned int @@ -163,9 +161,9 @@ struct mlxsw_sp_acl_atcam_region { }; struct mlxsw_sp_acl_atcam_entry_ht_key { - char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key, - * minus delta bits. - */ + char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded + * key. + */ u8 erp_id; }; @@ -177,7 +175,9 @@ struct mlxsw_sp_acl_atcam_entry { struct rhash_head ht_node; struct list_head list; /* Member in entries_list */ struct mlxsw_sp_acl_atcam_entry_ht_key ht_key; - char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */ + char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key, + * minus delta bits. + */ struct { u16 start; u8 mask; @@ -224,7 +224,6 @@ void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_entry *aentry); int mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_atcam_chunk *achunk, struct mlxsw_sp_acl_atcam_entry *aentry, struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 41e607a14846..49933818c6f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -220,7 +220,7 @@ start_again: for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); - if (!rif) + if (!rif || !mlxsw_sp_rif_dev(rif)) continue; err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 055cc6943b34..9d9aa28684af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -997,8 +997,8 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { .type = MLXSW_SP_FID_TYPE_DUMMY, .fid_size = sizeof(struct mlxsw_sp_fid), - .start_index = MLXSW_SP_RFID_BASE - 1, - .end_index = MLXSW_SP_RFID_BASE - 1, + .start_index = VLAN_N_VID - 1, + .end_index = VLAN_N_VID - 1, .ops = &mlxsw_sp_fid_dummy_ops, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 00db26c96bf5..6400cd644b7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -145,6 +145,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); char rtdp_pl[MLXSW_REG_RTDP_LEN]; struct ip_tunnel_parm parms; unsigned int type_check; @@ -157,6 +158,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, ikey = mlxsw_sp_ipip_parms4_ikey(parms); mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id); type_check = has_ikey ? MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h index 20d99b41611d..0035640156a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -28,6 +28,7 @@ struct mlxsw_sp_nve { unsigned int num_nve_tunnels; /* Protected by RTNL */ unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; u32 tunnel_index; + u16 ul_rif_index; /* Reserved for Spectrum */ }; struct mlxsw_sp_nve_ops { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index 9ba0b83bd949..93ccd9fc2266 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -7,6 +7,7 @@ #include #include "reg.h" +#include "spectrum.h" #include "spectrum_nve.h" /* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B) @@ -20,9 +21,9 @@ #define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \ VXLAN_F_LEARN) -static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, - const struct net_device *dev, - struct netlink_ext_ack *extack) +static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_config *cfg = &vxlan->cfg; @@ -112,19 +113,11 @@ static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); } -static int -mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_nve_config *config) +static void +mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl, + const struct mlxsw_sp_nve_config *config) { - char tngcr_pl[MLXSW_REG_TNGCR_LEN]; - u16 ul_vr_id; u8 udp_sport; - int err; - - err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, - &ul_vr_id); - if (err) - return err; mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, config->ttl); @@ -135,9 +128,25 @@ mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, get_random_bytes(&udp_sport, sizeof(udp_sport)); udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); +} + +static int +mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_vr_id; + int err; + + err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, + &ul_vr_id); + if (err) + return err; + + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); - mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); } @@ -231,7 +240,7 @@ mlxsw_sp_nve_vxlan_clear_offload(const struct net_device *nve_dev, __be32 vni) const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { .type = MLXSW_SP_NVE_TYPE_VXLAN, - .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .can_offload = mlxsw_sp_nve_vxlan_can_offload, .nve_config = mlxsw_sp_nve_vxlan_config, .init = mlxsw_sp1_nve_vxlan_init, .fini = mlxsw_sp1_nve_vxlan_fini, @@ -239,26 +248,126 @@ const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, }; -static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, - const struct net_device *dev, - struct netlink_ext_ack *extack) +static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, + bool learning_en) { - return false; + char tnpc_pl[MLXSW_REG_TNPC_LEN]; + + mlxsw_reg_tnpc_pack(tnpc_pl, MLXSW_REG_TNPC_TUNNEL_PORT_NVE, + learning_en); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnpc), tnpc_pl); +} + +static int +mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_rif_index; + int err; + + err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, config->ul_tb_id, + &ul_rif_index); + if (err) + return err; + mlxsw_sp->nve->ul_rif_index = ul_rif_index; + + err = mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, config->learning_en); + if (err) + goto err_vxlan_learning_set; + + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); + mlxsw_reg_tngcr_underlay_rif_set(tngcr_pl, ul_rif_index); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + if (err) + goto err_tngcr_write; + + return 0; + +err_tngcr_write: + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); +err_vxlan_learning_set: + mlxsw_sp_router_ul_rif_put(mlxsw_sp, ul_rif_index); + return err; +} + +static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); + mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->nve->ul_rif_index); +} + +static int mlxsw_sp2_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index, + u16 ul_rif_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); } static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, const struct mlxsw_sp_nve_config *config) { - return -EOPNOTSUPP; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_nve_parsing_set(mlxsw_sp, + MLXSW_SP_NVE_VXLAN_PARSING_DEPTH, + config->udp_dport); + if (err) + return err; + + err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp2_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index, + nve->ul_rif_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); + return err; } static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) { + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); } const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { .type = MLXSW_SP_NVE_TYPE_VXLAN, - .can_offload = mlxsw_sp2_nve_vxlan_can_offload, + .can_offload = mlxsw_sp_nve_vxlan_can_offload, .nve_config = mlxsw_sp_nve_vxlan_config, .init = mlxsw_sp2_nve_vxlan_init, .fini = mlxsw_sp2_nve_vxlan_fini, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 042341c7f6b9..230e1f6e192b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -80,7 +80,7 @@ struct mlxsw_sp_router { struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; - struct net_device *dev; + struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -120,6 +120,7 @@ struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; u16 ul_vr_id; /* Reserved for Spectrum-2. */ + u16 ul_rif_id; /* Reserved for Spectrum. */ }; struct mlxsw_sp_rif_params_ipip_lb { @@ -440,6 +441,8 @@ struct mlxsw_sp_vr { struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_rif *ul_rif; + refcount_t ul_rif_refcnt; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -1437,8 +1440,8 @@ mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, - struct mlxsw_sp_vr *ul_vr, bool enable) +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, + u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; struct mlxsw_sp_rif *rif = &lb_rif->common; @@ -1453,7 +1456,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, rif->rif_index, rif->vr_id, rif->dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, - ul_vr->id, saddr4, lb_cf.okey); + ul_vr_id, ul_rif_id, saddr4, lb_cf.okey); break; case MLXSW_SP_L3_PROTO_IPV6: @@ -1468,14 +1471,13 @@ static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif_ipip_lb *lb_rif; - struct mlxsw_sp_vr *ul_vr; int err = 0; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); if (ipip_entry) { lb_rif = ipip_entry->ol_lb; - ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id, + lb_rif->ul_rif_id, true); if (err) goto out; lb_rif->common.mtu = ol_dev->mtu; @@ -6224,10 +6226,12 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); - ether_addr_copy(rif->addr, l3_dev->dev_addr); - rif->mtu = l3_dev->mtu; + if (l3_dev) { + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->dev = l3_dev; + } rif->vr_id = vr_id; - rif->dev = l3_dev; rif->rif_index = rif_index; return rif; @@ -6251,7 +6255,19 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - return lb_rif->ul_vr_id; + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct mlxsw_sp_vr *ul_vr; + + ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); + if (WARN_ON(IS_ERR(ul_vr))) + return 0; + + return ul_vr->id; +} + +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_rif_id; } int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) @@ -6284,7 +6300,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, int i, err; type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); - ops = mlxsw_sp->router->rif_ops_arr[type]; + ops = mlxsw_sp->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) @@ -6303,6 +6319,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_rif_alloc; } dev_hold(rif->dev); + mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; @@ -6329,7 +6346,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, } mlxsw_sp_rif_counters_alloc(rif); - mlxsw_sp->router->rifs[rif_index] = rif; return rif; @@ -6341,6 +6357,7 @@ err_configure: if (fid) mlxsw_sp_fid_put(fid); err_fid_get: + mlxsw_sp->router->rifs[rif_index] = NULL; dev_put(rif->dev); kfree(rif); err_rif_alloc: @@ -6361,7 +6378,6 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -6369,6 +6385,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) if (fid) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); + mlxsw_sp->router->rifs[rif->rif_index] = NULL; dev_put(rif->dev); kfree(rif); vr->rif_count--; @@ -6750,7 +6767,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { rif = mlxsw_sp->router->rifs[i]; - if (rif && rif->dev != dev && + if (rif && rif->dev && rif->dev != dev && !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, mlxsw_sp->mac_mask)) { NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix"); @@ -7424,7 +7441,7 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, } static int -mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); @@ -7436,11 +7453,12 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true); if (err) goto err_loopback_op; lb_rif->ul_vr_id = ul_vr->id; + lb_rif->ul_rif_id = 0; ++ul_vr->rif_count; return 0; @@ -7449,32 +7467,213 @@ err_loopback_op: return err; } -static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false); --ul_vr->rif_count; mlxsw_sp_vr_put(mlxsw_sp, ul_vr); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { .type = MLXSW_SP_RIF_TYPE_IPIP_LB, .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), .setup = mlxsw_sp_rif_ipip_lb_setup, - .configure = mlxsw_sp_rif_ipip_lb_configure, - .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, + .configure = mlxsw_sp1_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, }; -static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { +const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, - [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, +}; + +static int +mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU); + mlxsw_reg_ritr_loopback_protocol_set(ritr_pl, + MLXSW_REG_RITR_LOOPBACK_GENERIC); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *ul_rif; + u16 rif_index; + int err; + + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); + return ERR_PTR(err); + } + + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + if (!ul_rif) + return ERR_PTR(-ENOMEM); + + mlxsw_sp->router->rifs[rif_index] = ul_rif; + ul_rif->mlxsw_sp = mlxsw_sp; + err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); + if (err) + goto ul_rif_op_err; + + return ul_rif; + +ul_rif_op_err: + mlxsw_sp->router->rifs[rif_index] = NULL; + kfree(ul_rif); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + + mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); + mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; + kfree(ul_rif); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) + return vr->ul_rif; + + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + if (IS_ERR(vr->ul_rif)) { + err = PTR_ERR(vr->ul_rif); + goto err_ul_rif_create; + } + + vr->rif_count++; + refcount_set(&vr->ul_rif_refcnt, 1); + + return vr->ul_rif; + +err_ul_rif_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + struct mlxsw_sp_vr *vr; + + vr = &mlxsw_sp->router->vrs[ul_rif->vr_id]; + + if (!refcount_dec_and_test(&vr->ul_rif_refcnt)) + return; + + vr->rif_count--; + mlxsw_sp_ul_rif_destroy(ul_rif); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + + ASSERT_RTNL(); + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + *ul_rif_index = ul_rif->rif_index; + + return 0; +} + +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + + ASSERT_RTNL(); + + ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; + if (WARN_ON(!ul_rif)) + return; + + mlxsw_sp_ul_rif_put(ul_rif); +} + +static int +mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + int err; + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = 0; + lb_rif->ul_rif_id = ul_rif->rif_index; + + return 0; + +err_loopback_op: + mlxsw_sp_ul_rif_put(ul_rif); + return err; +} + +static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + + ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id); + mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false); + mlxsw_sp_ul_rif_put(ul_rif); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp2_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, +}; + +const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) @@ -7487,8 +7686,6 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; - mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr; - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3dbafdeaab2b..cc1de91e8217 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -29,6 +29,7 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 8ab796e0c0d7..a4a9fe992193 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1233,7 +1233,7 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG; } static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) @@ -1290,7 +1290,7 @@ out: static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, - bool dynamic) + enum mlxsw_reg_sfd_rec_policy policy) { char *sfd_pl; u8 num_rec; @@ -1301,8 +1301,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, return -ENOMEM; mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, action, local_port); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port); num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) @@ -1321,7 +1320,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool dynamic) { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, - MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); + MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_sfd_rec_policy(dynamic)); } int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, @@ -1329,7 +1329,7 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, - false); + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY); } static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, @@ -2027,6 +2027,7 @@ mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, return 0; if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); err = -EINVAL; goto err_vni_exists; } @@ -2213,10 +2214,13 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, int err; fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); - if (!fid) + if (!fid) { + NL_SET_ERR_MSG_MOD(extack, "Did not find a corresponding FID"); return -EINVAL; + } if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); err = -EINVAL; goto err_vni_exists; } @@ -3231,8 +3235,10 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, * the lookup function to return 'vxlan_dev' */ if (flag_untagged && flag_pvid && - mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) + mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN already mapped to a different VNI"); return -EINVAL; + } if (!netif_running(vxlan_dev)) return 0; diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 07c1eb63415a..3a0b289d9771 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -14,61 +14,138 @@ #define EEPROM_INDICATOR_1 (0xA5) #define EEPROM_INDICATOR_2 (0xAA) #define EEPROM_MAC_OFFSET (0x01) -#define MAX_EEPROM_SIZE 512 +#define MAX_EEPROM_SIZE (512) +#define MAX_OTP_SIZE (1024) #define OTP_INDICATOR_1 (0xF3) #define OTP_INDICATOR_2 (0xF7) +static int lan743x_otp_power_up(struct lan743x_adapter *adapter) +{ + u32 reg_value; + + reg_value = lan743x_csr_read(adapter, OTP_PWR_DN); + + if (reg_value & OTP_PWR_DN_PWRDN_N_) { + /* clear it and wait to be cleared */ + reg_value &= ~OTP_PWR_DN_PWRDN_N_; + lan743x_csr_write(adapter, OTP_PWR_DN, reg_value); + + usleep_range(100, 20000); + } + + return 0; +} + +static void lan743x_otp_power_down(struct lan743x_adapter *adapter) +{ + u32 reg_value; + + reg_value = lan743x_csr_read(adapter, OTP_PWR_DN); + if (!(reg_value & OTP_PWR_DN_PWRDN_N_)) { + /* set power down bit */ + reg_value |= OTP_PWR_DN_PWRDN_N_; + lan743x_csr_write(adapter, OTP_PWR_DN, reg_value); + } +} + +static void lan743x_otp_set_address(struct lan743x_adapter *adapter, + u32 address) +{ + lan743x_csr_write(adapter, OTP_ADDR_HIGH, (address >> 8) & 0x03); + lan743x_csr_write(adapter, OTP_ADDR_LOW, address & 0xFF); +} + +static void lan743x_otp_read_go(struct lan743x_adapter *adapter) +{ + lan743x_csr_write(adapter, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); + lan743x_csr_write(adapter, OTP_CMD_GO, OTP_CMD_GO_GO_); +} + +static int lan743x_otp_wait_till_not_busy(struct lan743x_adapter *adapter) +{ + unsigned long timeout; + u32 reg_val; + + timeout = jiffies + HZ; + do { + if (time_after(jiffies, timeout)) { + netif_warn(adapter, drv, adapter->netdev, + "Timeout on OTP_STATUS completion\n"); + return -EIO; + } + udelay(1); + reg_val = lan743x_csr_read(adapter, OTP_STATUS); + } while (reg_val & OTP_STATUS_BUSY_); + + return 0; +} + +static int lan743x_otp_read(struct lan743x_adapter *adapter, u32 offset, + u32 length, u8 *data) +{ + int ret; + int i; + + if (offset + length > MAX_OTP_SIZE) + return -EINVAL; + + ret = lan743x_otp_power_up(adapter); + if (ret < 0) + return ret; + + ret = lan743x_otp_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + + for (i = 0; i < length; i++) { + lan743x_otp_set_address(adapter, offset + i); + + lan743x_otp_read_go(adapter); + ret = lan743x_otp_wait_till_not_busy(adapter); + if (ret < 0) + return ret; + data[i] = lan743x_csr_read(adapter, OTP_READ_DATA); + } + + lan743x_otp_power_down(adapter); + + return 0; +} + static int lan743x_otp_write(struct lan743x_adapter *adapter, u32 offset, u32 length, u8 *data) { - unsigned long timeout; - u32 buf; + int ret; int i; - buf = lan743x_csr_read(adapter, OTP_PWR_DN); + if (offset + length > MAX_OTP_SIZE) + return -EINVAL; - if (buf & OTP_PWR_DN_PWRDN_N_) { - /* clear it and wait to be cleared */ - lan743x_csr_write(adapter, OTP_PWR_DN, 0); + ret = lan743x_otp_power_up(adapter); + if (ret < 0) + return ret; - timeout = jiffies + HZ; - do { - udelay(1); - buf = lan743x_csr_read(adapter, OTP_PWR_DN); - if (time_after(jiffies, timeout)) { - netif_warn(adapter, drv, adapter->netdev, - "timeout on OTP_PWR_DN completion\n"); - return -EIO; - } - } while (buf & OTP_PWR_DN_PWRDN_N_); - } + ret = lan743x_otp_wait_till_not_busy(adapter); + if (ret < 0) + return ret; /* set to BYTE program mode */ lan743x_csr_write(adapter, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); for (i = 0; i < length; i++) { - lan743x_csr_write(adapter, OTP_ADDR1, - ((offset + i) >> 8) & - OTP_ADDR1_15_11_MASK_); - lan743x_csr_write(adapter, OTP_ADDR2, - ((offset + i) & - OTP_ADDR2_10_3_MASK_)); + lan743x_otp_set_address(adapter, offset + i); + lan743x_csr_write(adapter, OTP_PRGM_DATA, data[i]); lan743x_csr_write(adapter, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); lan743x_csr_write(adapter, OTP_CMD_GO, OTP_CMD_GO_GO_); - timeout = jiffies + HZ; - do { - udelay(1); - buf = lan743x_csr_read(adapter, OTP_STATUS); - if (time_after(jiffies, timeout)) { - netif_warn(adapter, drv, adapter->netdev, - "Timeout on OTP_STATUS completion\n"); - return -EIO; - } - } while (buf & OTP_STATUS_BUSY_); + ret = lan743x_otp_wait_till_not_busy(adapter); + if (ret < 0) + return ret; } + lan743x_otp_power_down(adapter); + return 0; } @@ -120,6 +197,9 @@ static int lan743x_eeprom_read(struct lan743x_adapter *adapter, u32 val; int i; + if (offset + length > MAX_EEPROM_SIZE) + return -EINVAL; + retval = lan743x_eeprom_confirm_not_busy(adapter); if (retval) return retval; @@ -148,6 +228,9 @@ static int lan743x_eeprom_write(struct lan743x_adapter *adapter, u32 val; int i; + if (offset + length > MAX_EEPROM_SIZE) + return -EINVAL; + retval = lan743x_eeprom_confirm_not_busy(adapter); if (retval) return retval; @@ -207,6 +290,11 @@ static void lan743x_ethtool_set_msglevel(struct net_device *netdev, static int lan743x_ethtool_get_eeprom_len(struct net_device *netdev) { + struct lan743x_adapter *adapter = netdev_priv(netdev); + + if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) + return MAX_OTP_SIZE; + return MAX_EEPROM_SIZE; } @@ -214,8 +302,14 @@ static int lan743x_ethtool_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct lan743x_adapter *adapter = netdev_priv(netdev); + int ret = 0; - return lan743x_eeprom_read(adapter, ee->offset, ee->len, data); + if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) + ret = lan743x_otp_read(adapter, ee->offset, ee->len, data); + else + ret = lan743x_eeprom_read(adapter, ee->offset, ee->len, data); + + return ret; } static int lan743x_ethtool_set_eeprom(struct net_device *netdev, @@ -224,17 +318,18 @@ static int lan743x_ethtool_set_eeprom(struct net_device *netdev, struct lan743x_adapter *adapter = netdev_priv(netdev); int ret = -EINVAL; - if (ee->magic == LAN743X_EEPROM_MAGIC) - ret = lan743x_eeprom_write(adapter, ee->offset, ee->len, - data); - /* Beware! OTP is One Time Programming ONLY! - * So do some strict condition check before messing up - */ - else if ((ee->magic == LAN743X_OTP_MAGIC) && - (ee->offset == 0) && - (ee->len == MAX_EEPROM_SIZE) && - (data[0] == OTP_INDICATOR_1)) - ret = lan743x_otp_write(adapter, ee->offset, ee->len, data); + if (adapter->flags & LAN743X_ADAPTER_FLAG_OTP) { + /* Beware! OTP is One Time Programming ONLY! */ + if (ee->magic == LAN743X_OTP_MAGIC) { + ret = lan743x_otp_write(adapter, ee->offset, + ee->len, data); + } + } else { + if (ee->magic == LAN743X_EEPROM_MAGIC) { + ret = lan743x_eeprom_write(adapter, ee->offset, + ee->len, data); + } + } return ret; } @@ -360,6 +455,10 @@ static const u32 lan743x_set2_hw_cnt_addr[] = { STAT_TX_COUNTER_ROLLOVER_STATUS }; +static const char lan743x_priv_flags_strings[][ETH_GSTRING_LEN] = { + "OTP_ACCESS", +}; + static void lan743x_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { @@ -375,6 +474,10 @@ static void lan743x_ethtool_get_strings(struct net_device *netdev, lan743x_set2_hw_cnt_strings, sizeof(lan743x_set2_hw_cnt_strings)); break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, lan743x_priv_flags_strings, + sizeof(lan743x_priv_flags_strings)); + break; } } @@ -399,6 +502,22 @@ static void lan743x_ethtool_get_ethtool_stats(struct net_device *netdev, } } +static u32 lan743x_ethtool_get_priv_flags(struct net_device *netdev) +{ + struct lan743x_adapter *adapter = netdev_priv(netdev); + + return adapter->flags; +} + +static int lan743x_ethtool_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct lan743x_adapter *adapter = netdev_priv(netdev); + + adapter->flags = flags; + + return 0; +} + static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { @@ -411,6 +530,8 @@ static int lan743x_ethtool_get_sset_count(struct net_device *netdev, int sset) ret += ARRAY_SIZE(lan743x_set2_hw_cnt_strings); return ret; } + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(lan743x_priv_flags_strings); default: return -EOPNOTSUPP; } @@ -705,6 +826,8 @@ const struct ethtool_ops lan743x_ethtool_ops = { .set_eeprom = lan743x_ethtool_set_eeprom, .get_strings = lan743x_ethtool_get_strings, .get_ethtool_stats = lan743x_ethtool_get_ethtool_stats, + .get_priv_flags = lan743x_ethtool_get_priv_flags, + .set_priv_flags = lan743x_ethtool_set_priv_flags, .get_sset_count = lan743x_ethtool_get_sset_count, .get_rxnfc = lan743x_ethtool_get_rxnfc, .get_rxfh_key_size = lan743x_ethtool_get_rxfh_key_size, diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 2d6eea18973e..3b02eeae5f45 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -26,6 +26,8 @@ #define FPGA_REV_GET_MAJOR_(fpga_rev) ((fpga_rev) & 0x000000FF) #define HW_CFG (0x010) +#define HW_CFG_RELOAD_TYPE_ALL_ (0x00000FC0) +#define HW_CFG_EE_OTP_RELOAD_ BIT(4) #define HW_CFG_LRST_ BIT(1) #define PMT_CTL (0x014) @@ -453,17 +455,19 @@ #define OTP_PWR_DN (0x1000) #define OTP_PWR_DN_PWRDN_N_ BIT(0) -#define OTP_ADDR1 (0x1004) -#define OTP_ADDR1_15_11_MASK_ (0x1F) - -#define OTP_ADDR2 (0x1008) -#define OTP_ADDR2_10_3_MASK_ (0xFF) +#define OTP_ADDR_HIGH (0x1004) +#define OTP_ADDR_LOW (0x1008) #define OTP_PRGM_DATA (0x1010) #define OTP_PRGM_MODE (0x1014) #define OTP_PRGM_MODE_BYTE_ BIT(0) +#define OTP_READ_DATA (0x1018) + +#define OTP_FUNC_CMD (0x1020) +#define OTP_FUNC_CMD_READ_ BIT(0) + #define OTP_TST_CMD (0x1024) #define OTP_TST_CMD_PRGVRFY_ BIT(3) @@ -713,6 +717,9 @@ struct lan743x_adapter { struct lan743x_phy phy; struct lan743x_tx tx[LAN743X_MAX_TX_CHANNELS]; struct lan743x_rx rx[LAN743X_MAX_RX_CHANNELS]; + +#define LAN743X_ADAPTER_FLAG_OTP BIT(0) + u32 flags; }; #define LAN743X_COMPONENT_FLAG_RX(channel) BIT(20 + (channel)) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e23ca90289f7..aa3a2098a583 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1266,7 +1266,7 @@ wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u64 imm = insn->imm; /* sign extend */ if (skip) { - meta->skip = true; + meta->flags |= FLAG_INSN_SKIP_NOOP; return 0; } @@ -1296,7 +1296,7 @@ wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_insn *insn = &meta->insn; if (skip) { - meta->skip = true; + meta->flags |= FLAG_INSN_SKIP_NOOP; return 0; } @@ -1334,8 +1334,9 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, insn->src_reg * 2, br_mask, insn->off); - wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, - insn->src_reg * 2 + 1, br_mask, insn->off); + if (is_mbpf_jmp64(meta)) + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); return 0; } @@ -1390,13 +1391,15 @@ static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) else emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); - tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); - if (!code->swap) - emit_alu(nfp_prog, reg_none(), - reg_a(reg + 1), carry_op, tmp_reg); - else - emit_alu(nfp_prog, reg_none(), - tmp_reg, carry_op, reg_a(reg + 1)); + if (is_mbpf_jmp64(meta)) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!code->swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), carry_op, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, carry_op, reg_a(reg + 1)); + } emit_br(nfp_prog, code->br_mask, insn->off, 0); @@ -1423,8 +1426,9 @@ static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) } emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); - emit_alu(nfp_prog, reg_none(), - reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + if (is_mbpf_jmp64(meta)) + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); emit_br(nfp_prog, code->br_mask, insn->off, 0); return 0; @@ -3048,6 +3052,19 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + swreg tmp_reg; + + tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -3061,9 +3078,10 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) /* Upper word of the mask can only be 0 or ~0 from sign extension, * so either ignore it or OR the whole thing in. */ - if (imm >> 32) + if (is_mbpf_jmp64(meta) && imm >> 32) { emit_alu(nfp_prog, reg_none(), reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog)); + } emit_br(nfp_prog, BR_BNE, insn->off, 0); return 0; @@ -3073,11 +3091,16 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ + bool is_jmp32 = is_mbpf_jmp32(meta); swreg tmp_reg; if (!imm) { - emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), - ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + if (is_jmp32) + emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE, + reg_b(insn->dst_reg * 2)); + else + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); emit_br(nfp_prog, BR_BNE, insn->off, 0); return 0; } @@ -3087,6 +3110,9 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); emit_br(nfp_prog, BR_BNE, insn->off, 0); + if (is_jmp32) + return 0; + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); @@ -3101,10 +3127,13 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), ALU_OP_XOR, reg_b(insn->src_reg * 2)); - emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), - ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); - emit_alu(nfp_prog, reg_none(), - imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + if (is_mbpf_jmp64(meta)) { + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, + reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, + imm_b(nfp_prog)); + } emit_br(nfp_prog, BR_BEQ, insn->off, 0); return 0; @@ -3182,7 +3211,7 @@ bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); } else { ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; - emit_br(nfp_prog, BR_UNC, meta->n + 1 + meta->insn.imm, 1); + emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1); offset_br = nfp_prog_current_offset(nfp_prog); } wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); @@ -3369,6 +3398,28 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, + [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, + [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, + [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, + [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, + [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, + [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, + [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, [BPF_JMP | BPF_EXIT] = jmp_exit, }; @@ -3395,9 +3446,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) int err; list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; - if (BPF_CLASS(meta->insn.code) != BPF_JMP) + if (!is_mbpf_jmp(meta)) continue; if (meta->insn.code == (BPF_JMP | BPF_EXIT) && !nfp_is_main_function(meta)) @@ -3439,7 +3490,7 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) jmp_dst = meta->jmp_dst; - if (jmp_dst->skip) { + if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { pr_err("Branch landing on removed instruction!!\n"); return -ELOOP; } @@ -3689,7 +3740,7 @@ static int nfp_translate(struct nfp_prog *nfp_prog) return nfp_prog->error; } - if (meta->skip) { + if (meta->flags & FLAG_INSN_SKIP_MASK) { nfp_prog->n_translated++; continue; } @@ -3737,10 +3788,10 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) /* Programs start with R6 = R1 but we ignore the skb pointer */ if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn.src_reg == 1 && insn.dst_reg == 6) - meta->skip = true; + meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; /* Return as soon as something doesn't match */ - if (!meta->skip) + if (!(meta->flags & FLAG_INSN_SKIP_MASK)) return; } } @@ -3755,19 +3806,17 @@ static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) list_for_each_entry(meta, &nfp_prog->insns, l) { struct bpf_insn insn = meta->insn; - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; - if (BPF_CLASS(insn.code) != BPF_ALU && - BPF_CLASS(insn.code) != BPF_ALU64 && - BPF_CLASS(insn.code) != BPF_JMP) + if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) continue; if (BPF_SRC(insn.code) != BPF_K) continue; if (insn.imm >= 0) continue; - if (BPF_CLASS(insn.code) == BPF_JMP) { + if (is_mbpf_jmp(meta)) { switch (BPF_OP(insn.code)) { case BPF_JGE: case BPF_JSGE: @@ -3829,7 +3878,7 @@ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) if (meta2->flags & FLAG_INSN_IS_JUMP_DST) continue; - meta2->skip = true; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } } @@ -3869,8 +3918,8 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) meta3->flags & FLAG_INSN_IS_JUMP_DST) continue; - meta2->skip = true; - meta3->skip = true; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } } @@ -4065,7 +4114,8 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) } head_ld_meta->paired_st = &head_st_meta->insn; - head_st_meta->skip = true; + head_st_meta->flags |= + FLAG_INSN_SKIP_PREC_DEPENDENT; } else { head_ld_meta->ldst_gather_len = 0; } @@ -4098,8 +4148,8 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) head_ld_meta = meta1; head_st_meta = meta2; } else { - meta1->skip = true; - meta2->skip = true; + meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; + meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; } head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); @@ -4124,7 +4174,7 @@ static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) if (meta->flags & FLAG_INSN_IS_JUMP_DST) cache_avail = false; - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; insn = &meta->insn; @@ -4210,7 +4260,7 @@ start_new: } list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) + if (meta->flags & FLAG_INSN_SKIP_MASK) continue; if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { @@ -4246,7 +4296,8 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) u32 id; nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { - if (meta1->skip || meta2->skip) + if (meta1->flags & FLAG_INSN_SKIP_MASK || + meta2->flags & FLAG_INSN_SKIP_MASK) continue; if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || @@ -4325,7 +4376,7 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return ret; } -void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; @@ -4336,7 +4387,7 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) unsigned int dst_idx; bool pseudo_call; - if (BPF_CLASS(code) != BPF_JMP) + if (!is_mbpf_jmp(meta)) continue; if (BPF_OP(code) == BPF_EXIT) continue; @@ -4353,7 +4404,7 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) else dst_idx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx, cnt); + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx); if (pseudo_call) dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 941277936475..b25a48218bcf 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -243,6 +243,16 @@ struct nfp_bpf_reg_state { #define FLAG_INSN_IS_JUMP_DST BIT(0) #define FLAG_INSN_IS_SUBPROG_START BIT(1) #define FLAG_INSN_PTR_CALLER_STACK_FRAME BIT(2) +/* Instruction is pointless, noop even on its own */ +#define FLAG_INSN_SKIP_NOOP BIT(3) +/* Instruction is optimized out based on preceding instructions */ +#define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4) +/* Instruction is optimized by the verifier */ +#define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5) + +#define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \ + FLAG_INSN_SKIP_PREC_DEPENDENT | \ + FLAG_INSN_SKIP_VERIFIER_OPT) /** * struct nfp_insn_meta - BPF instruction wrapper @@ -271,7 +281,6 @@ struct nfp_bpf_reg_state { * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags * @subprog_idx: index of subprogram to which the instruction belongs - * @skip: skip this instruction (optimized out) * @double_cb: callback for second part of the instruction * @l: link on nfp_prog->insns list */ @@ -319,7 +328,6 @@ struct nfp_insn_meta { unsigned short n; unsigned short flags; unsigned short subprog_idx; - bool skip; instr_cb_t double_cb; struct list_head l; @@ -357,6 +365,21 @@ static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); } +static inline bool is_mbpf_jmp32(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_JMP32; +} + +static inline bool is_mbpf_jmp64(const struct nfp_insn_meta *meta) +{ + return mbpf_class(meta) == BPF_JMP; +} + +static inline bool is_mbpf_jmp(const struct nfp_insn_meta *meta) +{ + return is_mbpf_jmp32(meta) || is_mbpf_jmp64(meta); +} + static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) { return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM); @@ -407,6 +430,20 @@ static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; } +static inline bool is_mbpf_cond_jump(const struct nfp_insn_meta *meta) +{ + u8 op; + + if (is_mbpf_jmp32(meta)) + return true; + + if (!is_mbpf_jmp64(meta)) + return false; + + op = mbpf_op(meta); + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; +} + static inline bool is_mbpf_helper_call(const struct nfp_insn_meta *meta) { struct bpf_insn insn = meta->insn; @@ -457,6 +494,7 @@ struct nfp_bpf_subprog_info { * @subprog_cnt: number of sub-programs, including main function * @map_records: the map record pointers from bpf->maps_neutral * @subprog: pointer to an array of objects holding info about sub-programs + * @n_insns: number of instructions on @insns list * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -489,6 +527,7 @@ struct nfp_prog { struct nfp_bpf_neutral_map **map_records; struct nfp_bpf_subprog_info *subprog; + unsigned int n_insns; struct list_head insns; }; @@ -505,7 +544,7 @@ struct nfp_bpf_vnic { }; bool nfp_is_subprog_start(struct nfp_insn_meta *meta); -void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); @@ -513,6 +552,10 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int nfp_bpf_finalize(struct bpf_verifier_env *env); +int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); + extern const struct bpf_prog_offload_ops nfp_bpf_dev_ops; struct netdev_bpf; @@ -526,7 +569,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int insn_idx, unsigned int n_insns); + unsigned int insn_idx); void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index f0283854fade..55c7dbf8b421 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -163,8 +163,9 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, list_add_tail(&meta->l, &nfp_prog->insns); } + nfp_prog->n_insns = cnt; - nfp_bpf_jit_prepare(nfp_prog, cnt); + nfp_bpf_jit_prepare(nfp_prog); return 0; } @@ -219,6 +220,10 @@ static int nfp_bpf_translate(struct bpf_prog *prog) unsigned int max_instr; int err; + /* We depend on dead code elimination succeeding */ + if (prog->aux->offload->opt_failed) + return -EINVAL; + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); @@ -591,6 +596,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, const struct bpf_prog_offload_ops nfp_bpf_dev_ops = { .insn_hook = nfp_verify_insn, .finalize = nfp_bpf_finalize, + .replace_insn = nfp_bpf_opt_replace_insn, + .remove_insns = nfp_bpf_opt_remove_insns, .prepare = nfp_bpf_verifier_prep, .translate = nfp_bpf_translate, .destroy = nfp_bpf_destroy, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 337bb862ec1d..36f56eb4cbe2 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -18,15 +18,15 @@ struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - unsigned int insn_idx, unsigned int n_insns) + unsigned int insn_idx) { unsigned int forward, backward, i; backward = meta->n - insn_idx; forward = insn_idx - meta->n; - if (min(forward, backward) > n_insns - insn_idx - 1) { - backward = n_insns - insn_idx - 1; + if (min(forward, backward) > nfp_prog->n_insns - insn_idx - 1) { + backward = nfp_prog->n_insns - insn_idx - 1; meta = nfp_prog_last_meta(nfp_prog); } if (min(forward, backward) > insn_idx && backward > insn_idx) { @@ -629,7 +629,7 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; struct nfp_insn_meta *meta = nfp_prog->verifier_meta; - meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx); nfp_prog->verifier_meta = meta; if (!nfp_bpf_supported_opcode(meta->insn.code)) { @@ -690,8 +690,7 @@ nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env, return 0; } -static unsigned int -nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog, unsigned int cnt) +static unsigned int nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta = nfp_prog_first_meta(nfp_prog); unsigned int max_depth = 0, depth = 0, frame = 0; @@ -726,7 +725,7 @@ continue_subprog: /* Find the callee and start processing it. */ meta = nfp_bpf_goto_meta(nfp_prog, meta, - meta->n + 1 + meta->insn.imm, cnt); + meta->n + 1 + meta->insn.imm); idx = meta->subprog_idx; frame++; goto process_subprog; @@ -778,8 +777,7 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env) nn = netdev_priv(env->prog->aux->offload->netdev); max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; - nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog, - env->prog->len); + nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog); if (nfp_prog->stack_size > max_stack) { pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", nfp_prog->stack_size, max_stack); @@ -788,3 +786,61 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env) return 0; } + +int nfp_bpf_opt_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx); + nfp_prog->verifier_meta = meta; + + /* conditional jump to jump conversion */ + if (is_mbpf_cond_jump(meta) && + insn->code == (BPF_JMP | BPF_JA | BPF_K)) { + unsigned int tgt_off; + + tgt_off = off + insn->off + 1; + + if (!insn->off) { + meta->jmp_dst = list_next_entry(meta, l); + meta->jump_neg_op = false; + } else if (meta->jmp_dst->n != aux_data[tgt_off].orig_idx) { + pr_vlog(env, "branch hard wire at %d changes target %d -> %d\n", + off, meta->jmp_dst->n, + aux_data[tgt_off].orig_idx); + return -EINVAL; + } + return 0; + } + + pr_vlog(env, "unsupported instruction replacement %hhx -> %hhx\n", + meta->insn.code, insn->code); + return -EINVAL; +} + +int nfp_bpf_opt_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + unsigned int i; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, aux_data[off].orig_idx); + + for (i = 0; i < cnt; i++) { + if (WARN_ON_ONCE(&meta->l == &nfp_prog->insns)) + return -EINVAL; + + /* doesn't count if it already has the flag */ + if (meta->flags & FLAG_INSN_SKIP_VERIFIER_OPT) + i--; + + meta->flags |= FLAG_INSN_SKIP_VERIFIER_OPT; + meta = list_next_entry(meta, l); + } + + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 24a90163775e..3b0955d34716 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -554,7 +554,6 @@ struct qed_hwfn { u8 dp_level; char name[NAME_SIZE]; - bool first_on_engine; bool hw_init_done; u8 num_funcs_on_engine; @@ -805,6 +804,9 @@ struct qed_dev { u32 mcp_nvm_resp; + /* Recovery */ + bool recov_in_prog; + /* Linux specific here */ struct qede_dev *edev; struct pci_dev *pdev; @@ -944,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); +void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn); void qed_get_protocol_stats(struct qed_dev *cdev, enum qed_mcp_protocol_type type, union qed_mcp_protocol_stats *stats); diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index c2ad405b2f50..35c9f484eb9f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2129,6 +2129,7 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks) rdma_tasks); /* no need for break since RoCE coexist with Ethernet */ } + /* fall through */ case QED_PCI_ETH: { struct qed_eth_pf_params *p_params = diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 8f6551421945..e2cbd77646a2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -795,19 +795,19 @@ static void qed_init_qm_pq(struct qed_hwfn *p_hwfn, /* get pq index according to PQ_FLAGS */ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, - u32 pq_flags) + unsigned long pq_flags) { struct qed_qm_info *qm_info = &p_hwfn->qm_info; /* Can't have multiple flags set here */ - if (bitmap_weight((unsigned long *)&pq_flags, + if (bitmap_weight(&pq_flags, sizeof(pq_flags) * BITS_PER_BYTE) > 1) { - DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); + DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags); goto err; } if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { - DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); + DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags); goto err; } @@ -1959,11 +1959,6 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, (p_hwfn->hw_info.personality == QED_PCI_FCOE) ? 1 : 0); STORE_RT_REG(p_hwfn, PRS_REG_SEARCH_ROCE_RT_OFFSET, 0); - /* Cleanup chip from previous driver if such remains exist */ - rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false); - if (rc) - return rc; - /* Sanity check before the PF init sequence that uses DMAE */ rc = qed_dmae_sanity(p_hwfn, p_ptt, "pf_phase"); if (rc) @@ -2007,17 +2002,15 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, return rc; } -static int qed_change_pci_hwfn(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u8 enable) +int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool b_enable) { - u32 delay_idx = 0, val, set_val = enable ? 1 : 0; + u32 delay_idx = 0, val, set_val = b_enable ? 1 : 0; - /* Change PF in PXP */ - qed_wr(p_hwfn, p_ptt, - PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val); + /* Configure the PF's internal FID_enable for master transactions */ + qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER, set_val); - /* wait until value is set - try for 1 second every 50us */ + /* Wait until value is set - try for 1 second every 50us */ for (delay_idx = 0; delay_idx < 20000; delay_idx++) { val = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER); @@ -2071,13 +2064,19 @@ static int qed_vf_start(struct qed_hwfn *p_hwfn, return 0; } +static void qed_pglueb_clear_err(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, + BIT(p_hwfn->abs_pf_id)); +} + int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) { struct qed_load_req_params load_req_params; u32 load_code, resp, param, drv_mb_param; bool b_default_mtu = true; struct qed_hwfn *p_hwfn; - int rc = 0, mfw_rc, i; + int rc = 0, i; u16 ether_type; if ((p_params->int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) { @@ -2092,7 +2091,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) } for_each_hwfn(cdev, i) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + p_hwfn = &cdev->hwfns[i]; /* If management didn't provide a default, set one of our own */ if (!p_hwfn->hw_info.mtu) { @@ -2105,9 +2104,6 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) continue; } - /* Enable DMAE in PXP */ - rc = qed_change_pci_hwfn(p_hwfn, p_hwfn->p_main_ptt, true); - rc = qed_calc_hw_mode(p_hwfn); if (rc) return rc; @@ -2144,12 +2140,43 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) "Load request was sent. Load code: 0x%x\n", load_code); + /* Only relevant for recovery: + * Clear the indication after LOAD_REQ is responded by the MFW. + */ + cdev->recov_in_prog = false; + qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt); qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt); - p_hwfn->first_on_engine = (load_code == - FW_MSG_CODE_DRV_LOAD_ENGINE); + /* Clean up chip from previous driver if such remains exist. + * This is not needed when the PF is the first one on the + * engine, since afterwards we are going to init the FW. + */ + if (load_code != FW_MSG_CODE_DRV_LOAD_ENGINE) { + rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt, + p_hwfn->rel_pf_id, false); + if (rc) { + DP_NOTICE(p_hwfn, "Final cleanup failed\n"); + goto load_err; + } + } + + /* Log and clear previous pglue_b errors if such exist */ + qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_main_ptt); + + /* Enable the PF's internal FID_enable in the PXP */ + rc = qed_pglueb_set_pfid_enable(p_hwfn, p_hwfn->p_main_ptt, + true); + if (rc) + goto load_err; + + /* Clear the pglue_b was_error indication. + * In E4 it must be done after the BME and the internal + * FID_enable for the PF are set, since VDMs may cause the + * indication to be set again. + */ + qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt); switch (load_code) { case FW_MSG_CODE_DRV_LOAD_ENGINE: @@ -2180,39 +2207,29 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) break; } - if (rc) + if (rc) { DP_NOTICE(p_hwfn, "init phase failed for loadcode 0x%x (rc %d)\n", - load_code, rc); - - /* ACK mfw regardless of success or failure of initialization */ - mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, - DRV_MSG_CODE_LOAD_DONE, - 0, &load_code, ¶m); - if (rc) - return rc; - if (mfw_rc) { - DP_NOTICE(p_hwfn, "Failed sending LOAD_DONE command\n"); - return mfw_rc; + load_code, rc); + goto load_err; } - /* Check if there is a DID mismatch between nvm-cfg/efuse */ - if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR) - DP_NOTICE(p_hwfn, - "warning: device configuration is not supported on this board type. The device may not function as expected.\n"); + rc = qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt); + if (rc) + return rc; /* send DCBX attention request command */ DP_VERBOSE(p_hwfn, QED_MSG_DCB, "sending phony dcbx set command to trigger DCBx attention handling\n"); - mfw_rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, - DRV_MSG_CODE_SET_DCBX, - 1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT, - &load_code, ¶m); - if (mfw_rc) { + rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, + DRV_MSG_CODE_SET_DCBX, + 1 << DRV_MB_PARAM_DCBX_NOTIFY_SHIFT, + &resp, ¶m); + if (rc) { DP_NOTICE(p_hwfn, "Failed to send DCBX attention request\n"); - return mfw_rc; + return rc; } p_hwfn->hw_init_done = true; @@ -2261,6 +2278,12 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) } return 0; + +load_err: + /* The MFW load lock should be released also when initialization fails. + */ + qed_mcp_load_done(p_hwfn, p_hwfn->p_main_ptt); + return rc; } #define QED_HW_STOP_RETRY_LIMIT (10) @@ -2273,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev, qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0); qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0); + if (cdev->recov_in_prog) + return; + for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) { if ((!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_CONN)) && @@ -2335,12 +2361,14 @@ int qed_hw_stop(struct qed_dev *cdev) p_hwfn->hw_init_done = false; /* Send unload command to MCP */ - rc = qed_mcp_unload_req(p_hwfn, p_ptt); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed sending a UNLOAD_REQ command. rc = %d.\n", - rc); - rc2 = -EINVAL; + if (!cdev->recov_in_prog) { + rc = qed_mcp_unload_req(p_hwfn, p_ptt); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed sending a UNLOAD_REQ command. rc = %d.\n", + rc); + rc2 = -EINVAL; + } } qed_slowpath_irq_sync(p_hwfn); @@ -2382,27 +2410,31 @@ int qed_hw_stop(struct qed_dev *cdev) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0); qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0); - qed_mcp_unload_done(p_hwfn, p_ptt); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed sending a UNLOAD_DONE command. rc = %d.\n", - rc); - rc2 = -EINVAL; + if (!cdev->recov_in_prog) { + rc = qed_mcp_unload_done(p_hwfn, p_ptt); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed sending a UNLOAD_DONE command. rc = %d.\n", + rc); + rc2 = -EINVAL; + } } } - if (IS_PF(cdev)) { + if (IS_PF(cdev) && !cdev->recov_in_prog) { p_hwfn = QED_LEADING_HWFN(cdev); p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt; - /* Disable DMAE in PXP - in CMT, this should only be done for - * first hw-function, and only after all transactions have - * stopped for all active hw-functions. + /* Clear the PF's internal FID_enable in the PXP. + * In CMT this should only be done for first hw-function, and + * only after all transactions have stopped for all active + * hw-functions. */ - rc = qed_change_pci_hwfn(p_hwfn, p_ptt, false); + rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false); if (rc) { DP_NOTICE(p_hwfn, - "qed_change_pci_hwfn failed. rc = %d.\n", rc); + "qed_pglueb_set_pfid_enable() failed. rc = %d.\n", + rc); rc2 = -EINVAL; } } @@ -2502,9 +2534,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn) PGLUE_B_REG_PGL_ADDR_94_F0_BB, 0); } - /* Clean Previous errors if such exist */ - qed_wr(p_hwfn, p_hwfn->p_main_ptt, - PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id); + /* Clean previous pglue_b errors if such exist */ + qed_pglueb_clear_err(p_hwfn, p_hwfn->p_main_ptt); /* enable internal target-read */ qed_wr(p_hwfn, p_hwfn->p_main_ptt, @@ -3440,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, void __iomem *p_doorbells, enum qed_pci_personality personality) { + struct qed_dev *cdev = p_hwfn->cdev; int rc = 0; /* Split PCI bars evenly between hwfns */ @@ -3492,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Sending a mailbox to the MFW should be done after qed_get_hw_info() * is called as it sets the ports number in an engine. */ - if (IS_LEAD_HWFN(p_hwfn)) { + if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) { rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt); if (rc) DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index acccd85170aa..e4b4e3b78e8a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -472,6 +472,18 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle); int qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle); +/** + * @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER + * + * @param p_hwfn + * @param p_ptt + * @param b_enable - true/false + * + * @return int + */ +int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool b_enable); + /** * @brief db_recovery_add - add doorbell information to the doorbell * recovery mechanism. diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b13cfb449d8f..417121e74ee9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_LLDP_DATA_UPDATED, MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED, MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED, - MFW_DRV_MSG_RESERVED4, + MFW_DRV_MSG_ERROR_RECOVERY, MFW_DRV_MSG_BW_UPDATE, MFW_DRV_MSG_S_TAG_UPDATE, MFW_DRV_MSG_GET_LAN_STATS, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 70504dcf4087..72ec1c6bdf70 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, int qed_status = 0; u32 offset = 0; + if (p_hwfn->cdev->recov_in_prog) { + DP_VERBOSE(p_hwfn, + NETIF_MSG_HW, + "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n", + src_addr, src_type, dst_addr, dst_type, + size_in_dwords); + + /* Let the flow complete w/o any error handling */ + return 0; + } + qed_dmae_opcode(p_hwfn, (src_type == QED_DMAE_ADDRESS_GRC), (dst_type == QED_DMAE_ADDRESS_GRC), diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 92340919d852..e23980e301b6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -255,112 +255,114 @@ out: #define PGLUE_ATTENTION_ICPL_VALID (1 << 23) #define PGLUE_ATTENTION_ZLR_VALID (1 << 25) #define PGLUE_ATTENTION_ILT_VALID (1 << 23) -static int qed_pglub_rbc_attn_cb(struct qed_hwfn *p_hwfn) + +int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) { u32 tmp; - tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_TX_ERR_WR_DETAILS2); + tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS2); if (tmp & PGLUE_ATTENTION_VALID) { u32 addr_lo, addr_hi, details; - addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_lo = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_ADD_31_0); - addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_hi = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_ADD_63_32); - details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + details = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS); - DP_INFO(p_hwfn, - "Illegal write by chip to [%08x:%08x] blocked.\n" - "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" - "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n", - addr_hi, addr_lo, details, - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), - GET_FIELD(details, - PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0, - tmp, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0, - GET_FIELD(tmp, - PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0); + DP_NOTICE(p_hwfn, + "Illegal write by chip to [%08x:%08x] blocked.\n" + "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" + "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n", + addr_hi, addr_lo, details, + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), + GET_FIELD(details, + PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0, + tmp, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0); } - tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_TX_ERR_RD_DETAILS2); + tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS2); if (tmp & PGLUE_ATTENTION_RD_VALID) { u32 addr_lo, addr_hi, details; - addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_lo = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_ADD_31_0); - addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_hi = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_ADD_63_32); - details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + details = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_RD_DETAILS); - DP_INFO(p_hwfn, - "Illegal read by chip from [%08x:%08x] blocked.\n" - " Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" - " Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n", - addr_hi, addr_lo, details, - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), - (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), - GET_FIELD(details, - PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0, - tmp, - GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 - : 0, - GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0, - GET_FIELD(tmp, PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 - : 0); + DP_NOTICE(p_hwfn, + "Illegal read by chip from [%08x:%08x] blocked.\n" + "Details: %08x [PFID %02x, VFID %02x, VF_VALID %02x]\n" + "Details2 %08x [Was_error %02x BME deassert %02x FID_enable deassert %02x]\n", + addr_hi, addr_lo, details, + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_PFID), + (u8)GET_FIELD(details, PGLUE_ATTENTION_DETAILS_VFID), + GET_FIELD(details, + PGLUE_ATTENTION_DETAILS_VF_VALID) ? 1 : 0, + tmp, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_WAS_ERR) ? 1 : 0, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_BME) ? 1 : 0, + GET_FIELD(tmp, + PGLUE_ATTENTION_DETAILS2_FID_EN) ? 1 : 0); } - tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL); + tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_TX_ERR_WR_DETAILS_ICPL); if (tmp & PGLUE_ATTENTION_ICPL_VALID) - DP_INFO(p_hwfn, "ICPL error - %08x\n", tmp); + DP_NOTICE(p_hwfn, "ICPL error - %08x\n", tmp); - tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS); + tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_DETAILS); if (tmp & PGLUE_ATTENTION_ZLR_VALID) { u32 addr_hi, addr_lo; - addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_lo = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_ADD_31_0); - addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_hi = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_MASTER_ZLR_ERR_ADD_63_32); - DP_INFO(p_hwfn, "ZLR eror - %08x [Address %08x:%08x]\n", - tmp, addr_hi, addr_lo); + DP_NOTICE(p_hwfn, "ZLR error - %08x [Address %08x:%08x]\n", + tmp, addr_hi, addr_lo); } - tmp = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_VF_ILT_ERR_DETAILS2); + tmp = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS2); if (tmp & PGLUE_ATTENTION_ILT_VALID) { u32 addr_hi, addr_lo, details; - addr_lo = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_lo = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_ADD_31_0); - addr_hi = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + addr_hi = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_ADD_63_32); - details = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, + details = qed_rd(p_hwfn, p_ptt, PGLUE_B_REG_VF_ILT_ERR_DETAILS); - DP_INFO(p_hwfn, - "ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n", - details, tmp, addr_hi, addr_lo); + DP_NOTICE(p_hwfn, + "ILT error - Details %08x Details2 %08x [Address %08x:%08x]\n", + details, tmp, addr_hi, addr_lo); } /* Clear the indications */ - qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, - PGLUE_B_REG_LATCHED_ERRORS_CLR, (1 << 2)); + qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_LATCHED_ERRORS_CLR, BIT(2)); return 0; } +static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn) +{ + return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt); +} + #define QED_DORQ_ATTENTION_REASON_MASK (0xfffff) #define QED_DORQ_ATTENTION_OPAQUE_MASK (0xffff) #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0) @@ -540,7 +542,7 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = { {"PGLUE misc_flr", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID}, {"PGLUE B RBC", ATTENTION_PAR_INT, - qed_pglub_rbc_attn_cb, BLOCK_PGLUE_B}, + qed_pglueb_rbc_attn_cb, BLOCK_PGLUE_B}, {"PGLUE misc_mctp", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID}, {"Flash event", ATTENTION_SINGLE, NULL, MAX_BLOCK_ID}, diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h index d81a62ebd524..1f356ed4f761 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.h +++ b/drivers/net/ethernet/qlogic/qed/qed_int.h @@ -431,4 +431,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, #define QED_MAPPING_MEMORY_SIZE(dev) (NUM_OF_SBS(dev)) +int qed_pglueb_rbc_attn_handler(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 67c02ea93906..e68ca83ae915 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -609,6 +609,10 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, + (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && + !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); + SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, !!(accept_filter & QED_ACCEPT_BCAST)); @@ -744,6 +748,11 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, return rc; } + if (p_params->update_ctl_frame_check) { + p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en; + p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en; + } + /* Update mcast bins for VFs, PF doesn't use this functionality */ qed_sp_update_mcast_bin(p_hwfn, p_ramrod, p_params); @@ -2688,7 +2697,8 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | QED_ACCEPT_MCAST_UNMATCHED; - accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; + accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | + QED_ACCEPT_MCAST_UNMATCHED; } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 8d80f1095d17..7127d5aaac42 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -219,6 +219,9 @@ struct qed_sp_vport_update_params { struct qed_rss_params *rss_params; struct qed_filter_accept_flags accept_flags; struct qed_sge_tpa_params *sge_tpa_params; + u8 update_ctl_frame_check; + u8 mac_chk_en; + u8 ethtype_chk_en; }; int qed_sp_vport_update(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index d9237c65a838..b5f419b71287 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2451,19 +2451,24 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, { struct qed_ll2_tx_pkt_info pkt; const skb_frag_t *frag; + u8 flags = 0, nr_frags; int rc = -EINVAL, i; dma_addr_t mapping; u16 vlan = 0; - u8 flags = 0; if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); return -EINVAL; } - if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + /* Cache number of fragments from SKB since SKB may be freed by + * the completion routine after calling qed_ll2_prepare_tx_packet() + */ + nr_frags = skb_shinfo(skb)->nr_frags; + + if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", - 1 + skb_shinfo(skb)->nr_frags); + 1 + nr_frags); return -EINVAL; } @@ -2485,7 +2490,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, } memset(&pkt, 0, sizeof(pkt)); - pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; + pkt.num_of_bds = 1 + nr_frags; pkt.vlan = vlan; pkt.bd_flags = flags; pkt.tx_dest = QED_LL2_TX_DEST_NW; @@ -2496,12 +2501,17 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags)) pkt.remove_stag = true; + /* qed_ll2_prepare_tx_packet() may actually send the packet if + * there are no fragments in the skb and subsequently the completion + * routine may run and free the SKB, so no dereferencing the SKB + * beyond this point unless skb has any fragments. + */ rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, &pkt, 1); if (rc) goto err; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + for (i = 0; i < nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6adf5bda9811..b47352643fb5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev, qed_init_dp(cdev, params->dp_module, params->dp_level); + cdev->recov_in_prog = params->recov_in_prog; + rc = qed_init_pci(cdev, pdev); if (rc) { DP_ERR(cdev, "init pci failed\n"); @@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type, return qed_mcp_get_nvm_image(hwfn, type, buf, len); } +void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn) +{ + struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common; + void *cookie = p_hwfn->cdev->ops_cookie; + + if (ops && ops->schedule_recovery_handler) + ops->schedule_recovery_handler(cookie); +} + static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, void *handle) { @@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +static int qed_recovery_process(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt; + int rc = 0; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EAGAIN; + + rc = qed_start_recovery_process(p_hwfn, p_ptt); + + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + static int qed_update_wol(struct qed_dev *cdev, bool enabled) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); @@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = { .nvm_get_image = &qed_nvm_get_image, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, + .recovery_process = &qed_recovery_process, + .recovery_prolog = &qed_recovery_prolog, .update_drv_state = &qed_update_drv_state, .update_mac = &qed_update_mac, .update_mtu = &qed_update_mtu, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index e7f18e34ff0d..bb8541847aa5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1070,6 +1070,27 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, return 0; } +int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_DONE, 0, &resp, + ¶m); + if (rc) { + DP_NOTICE(p_hwfn, + "Failed to send a LOAD_DONE command, rc = %d\n", rc); + return rc; + } + + /* Check if there is a DID mismatch between nvm-cfg/efuse */ + if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR) + DP_NOTICE(p_hwfn, + "warning: device configuration is not supported on this board type. The device may not function as expected.\n"); + + return 0; +} + int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_mb_params mb_params; @@ -1528,6 +1549,60 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) return 0; } +u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt; + + if (IS_VF(p_hwfn->cdev)) + return -EINVAL; + + path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, + PUBLIC_PATH); + path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr); + path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn)); + + proc_kill_cnt = qed_rd(p_hwfn, p_ptt, + path_addr + + offsetof(struct public_path, process_kill)) & + PROCESS_KILL_COUNTER_MASK; + + return proc_kill_cnt; +} + +static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + u32 proc_kill_cnt; + + /* Prevent possible attentions/interrupts during the recovery handling + * and till its load phase, during which they will be re-enabled. + */ + qed_int_igu_disable_int(p_hwfn, p_ptt); + + DP_NOTICE(p_hwfn, "Received a process kill indication\n"); + + /* The following operations should be done once, and thus in CMT mode + * are carried out by only the first HW function. + */ + if (p_hwfn != QED_LEADING_HWFN(cdev)) + return; + + if (cdev->recov_in_prog) { + DP_NOTICE(p_hwfn, + "Ignoring the indication since a recovery process is already in progress\n"); + return; + } + + cdev->recov_in_prog = true; + + proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt); + DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt); + + qed_schedule_recovery_handler(p_hwfn); +} + static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum MFW_DRV_MSG_TYPE type) @@ -1758,6 +1833,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE: qed_mcp_handle_transceiver_change(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_ERROR_RECOVERY: + qed_mcp_handle_process_kill(p_hwfn, p_ptt); + break; case MFW_DRV_MSG_GET_LAN_STATS: case MFW_DRV_MSG_GET_FCOE_STATS: case MFW_DRV_MSG_GET_ISCSI_STATS: @@ -2303,6 +2381,43 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn, return 0; } +int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + + if (cdev->recov_in_prog) { + DP_NOTICE(p_hwfn, + "Avoid triggering a recovery since such a process is already in progress\n"); + return -EAGAIN; + } + + DP_NOTICE(p_hwfn, "Triggering a recovery process\n"); + qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1); + + return 0; +} + +#define QED_RECOVERY_PROLOG_SLEEP_MS 100 + +int qed_recovery_prolog(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_main_ptt; + int rc; + + /* Allow ongoing PCIe transactions to complete */ + msleep(QED_RECOVERY_PROLOG_SLEEP_MS); + + /* Clear the PF's internal FID_enable in the PXP */ + rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false); + if (rc) + DP_NOTICE(p_hwfn, + "qed_pglueb_set_pfid_enable() failed. rc = %d.\n", + rc); + + return rc; +} + static int qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 vf_id, u8 num) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index eddf67798d6f..6e1d72a669ae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -440,6 +440,38 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Read the MFW process kill counter + * + * @param p_hwfn + * @param p_ptt + * + * @return u32 + */ +u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt); + +/** + * @brief Trigger a recovery process + * + * @param p_hwfn + * @param p_ptt + * + * @return int + */ +int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +/** + * @brief A recovery handler must call this function as its first step. + * It is assumed that the handler is not run from an interrupt context. + * + * @param cdev + * @param p_ptt + * + * @return int + */ +int qed_recovery_prolog(struct qed_dev *cdev); + /** * @brief Notify MFW about the change in base device properties * @@ -800,6 +832,16 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_load_req_params *p_params); +/** + * @brief Sends a LOAD_DONE message to the MFW + * + * @param p_hwfn + * @param p_ptt + * + * @return int - 0 - Operation was successful. + */ +int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + /** * @brief Sends a UNLOAD_REQ message to the MFW * diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 8939ed6e08b7..5ce825ca5f24 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -518,6 +518,8 @@ 0x180824UL #define MISC_REG_AEU_GENERAL_ATTN_0 \ 0x008400UL +#define MISC_REG_AEU_GENERAL_ATTN_35 \ + 0x00848cUL #define CAU_REG_SB_ADDR_MEMORY \ 0x1c8000UL #define CAU_REG_SB_VAR_MEMORY \ diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index eb88bbc6b193..3e0f7c46bb1b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -790,6 +790,17 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) SPQ_HIGH_PRI_RESERVE_DEFAULT); } +static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent, + u8 *fw_return_code) +{ + if (!fw_return_code) + return; + + if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE || + p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP) + *fw_return_code = RDMA_RETURN_OK; +} + /* Avoid overriding of SPQ entries when getting out-of-order completions, by * marking the completions in a bitmap and increasing the chain consumer only * for the first successive completed entries. @@ -825,6 +836,17 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, return -EINVAL; } + if (p_hwfn->cdev->recov_in_prog) { + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n", + p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id); + + /* Let the flow complete w/o any error handling */ + qed_spq_recov_set_ret_code(p_ent, fw_return_code); + return 0; + } + /* Complete the entry */ rc = qed_spq_fill_entry(p_hwfn, p_ent); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index ca6290fa0f30..9faaa6df78ed 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -1969,7 +1969,9 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; - params.check_mac = true; + + /* Non trusted VFs should enable control frame filtering */ + params.check_mac = !vf->p_vf_info.is_trusted_configured; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); if (rc) { @@ -4447,6 +4449,13 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled) pci_disable_sriov(cdev->pdev); + if (cdev->recov_in_prog) { + DP_VERBOSE(cdev, + QED_MSG_IOV, + "Skip SRIOV disable operations in the device since a recovery is in progress\n"); + goto out; + } + for_each_hwfn(cdev, i) { struct qed_hwfn *hwfn = &cdev->hwfns[i]; struct qed_ptt *ptt = qed_ptt_acquire(hwfn); @@ -4486,7 +4495,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) qed_ptt_release(hwfn, ptt); } - +out: qed_iov_set_vfs_to_disable(cdev, false); return 0; @@ -5130,6 +5139,9 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) params.opaque_fid = vf->opaque_fid; params.vport_id = vf->vport_id; + params.update_ctl_frame_check = 1; + params.mac_chk_en = !vf_info->is_trusted_configured; + if (vf_info->rx_accept_mode & mask) { flags->update_rx_mode_config = 1; flags->rx_accept_filter = vf_info->rx_accept_mode; @@ -5147,7 +5159,8 @@ static void qed_iov_handle_trust_change(struct qed_hwfn *hwfn) } if (flags->update_rx_mode_config || - flags->update_tx_mode_config) + flags->update_tx_mode_config || + params.update_ctl_frame_check) qed_sp_vport_update(hwfn, ¶ms, QED_SPQ_MODE_EBLOCK, NULL); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index b6cccf44bf40..5dda547772c1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -261,6 +261,7 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; struct vf_pf_resc_request *p_resc; + u8 retry_cnt = VF_ACQUIRE_THRESH; bool resources_acquired = false; struct vfpf_acquire_tlv *req; int rc = 0, attempts = 0; @@ -314,6 +315,15 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); + + /* Re-try acquire in case of vf-pf hw channel timeout */ + if (retry_cnt && rc == -EBUSY) { + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF retrying to acquire due to VPC timeout\n"); + retry_cnt--; + continue; + } + if (rc) goto exit; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 613249d1e967..843416404aeb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -162,6 +162,7 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + bool exp_recovery; }; struct qede_ptp; @@ -264,6 +265,7 @@ struct qede_dev { enum QEDE_STATE { QEDE_STATE_CLOSED, QEDE_STATE_OPEN, + QEDE_STATE_RECOVERY, }; #define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) @@ -462,6 +464,7 @@ struct qede_fastpath { #define QEDE_CSUM_UNNECESSARY BIT(1) #define QEDE_TUNN_CSUM_UNNECESSARY BIT(2) +#define QEDE_SP_RECOVERY 0 #define QEDE_SP_RX_MODE 1 #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5a74fcbdbc2b..6b4d96635238 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id); static void qede_remove(struct pci_dev *pdev); static void qede_shutdown(struct pci_dev *pdev); static void qede_link_update(void *dev, struct qed_link_output *link); +static void qede_schedule_recovery_handler(void *dev); +static void qede_recovery_handler(struct qede_dev *edev); static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_generic_tlv_data(void *edev, struct qed_generic_tlvs *data); -/* The qede lock is used to protect driver state change and driver flows that - * are not reentrant. - */ -void __qede_lock(struct qede_dev *edev) -{ - mutex_lock(&edev->qede_lock); -} - -void __qede_unlock(struct qede_dev *edev) -{ - mutex_unlock(&edev->qede_lock); -} - #ifdef CONFIG_QED_SRIOV static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) @@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { .arfs_filter_op = qede_arfs_filter_op, #endif .link_update = qede_link_update, + .schedule_recovery_handler = qede_schedule_recovery_handler, .get_generic_tlv_data = qede_get_generic_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data, }, @@ -950,11 +940,57 @@ err: return -ENOMEM; } +/* The qede lock is used to protect driver state change and driver flows that + * are not reentrant. + */ +void __qede_lock(struct qede_dev *edev) +{ + mutex_lock(&edev->qede_lock); +} + +void __qede_unlock(struct qede_dev *edev) +{ + mutex_unlock(&edev->qede_lock); +} + +/* This version of the lock should be used when acquiring the RTNL lock is also + * needed in addition to the internal qede lock. + */ +void qede_lock(struct qede_dev *edev) +{ + rtnl_lock(); + __qede_lock(edev); +} + +void qede_unlock(struct qede_dev *edev) +{ + __qede_unlock(edev); + rtnl_unlock(); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* The locking scheme depends on the specific flag: + * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to + * ensure that ongoing flows are ended and new ones are not started. + * In other cases - only the internal qede lock should be acquired. + */ + + if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { +#ifdef CONFIG_QED_SRIOV + /* SRIOV must be disabled outside the lock to avoid a deadlock. + * The recovery of the active VFs is currently not supported. + */ + qede_sriov_configure(edev->pdev, 0); +#endif + qede_lock(edev); + qede_recovery_handler(edev); + qede_unlock(edev); + } + __qede_lock(edev); if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags)) @@ -1031,6 +1067,7 @@ static void qede_log_probe(struct qede_dev *edev) enum qede_probe_mode { QEDE_PROBE_NORMAL, + QEDE_PROBE_RECOVERY, }; static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, @@ -1051,6 +1088,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, probe_params.dp_module = dp_module; probe_params.dp_level = dp_level; probe_params.is_vf = is_vf; + probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY); cdev = qed_ops->common->probe(pdev, &probe_params); if (!cdev) { rc = -ENODEV; @@ -1078,11 +1116,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, if (rc) goto err2; - edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, - dp_level); - if (!edev) { - rc = -ENOMEM; - goto err2; + if (mode != QEDE_PROBE_RECOVERY) { + edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, + dp_level); + if (!edev) { + rc = -ENOMEM; + goto err2; + } + } else { + struct net_device *ndev = pci_get_drvdata(pdev); + + edev = netdev_priv(ndev); + edev->cdev = cdev; + memset(&edev->stats, 0, sizeof(edev->stats)); + memcpy(&edev->dev_info, &dev_info, sizeof(dev_info)); } if (is_vf) @@ -1090,28 +1137,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_init_ndev(edev); - rc = qede_rdma_dev_add(edev); + rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY)); if (rc) goto err3; - /* Prepare the lock prior to the registration of the netdev, - * as once it's registered we might reach flows requiring it - * [it's even possible to reach a flow needing it directly - * from there, although it's unlikely]. - */ - INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); - mutex_init(&edev->qede_lock); - rc = register_netdev(edev->ndev); - if (rc) { - DP_NOTICE(edev, "Cannot register net-device\n"); - goto err4; + if (mode != QEDE_PROBE_RECOVERY) { + /* Prepare the lock prior to the registration of the netdev, + * as once it's registered we might reach flows requiring it + * [it's even possible to reach a flow needing it directly + * from there, although it's unlikely]. + */ + INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); + mutex_init(&edev->qede_lock); + + rc = register_netdev(edev->ndev); + if (rc) { + DP_NOTICE(edev, "Cannot register net-device\n"); + goto err4; + } } edev->ops->common->set_name(cdev, edev->ndev->name); /* PTP not supported on VFs */ if (!is_vf) - qede_ptp_enable(edev, true); + qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL)); edev->ops->register_ops(cdev, &qede_ll_ops, edev); @@ -1126,7 +1176,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; err4: - qede_rdma_dev_remove(edev); + qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY)); err3: free_netdev(edev->ndev); err2: @@ -1162,6 +1212,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id) enum qede_remove_mode { QEDE_REMOVE_NORMAL, + QEDE_REMOVE_RECOVERY, }; static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) @@ -1172,16 +1223,20 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); - qede_rdma_dev_remove(edev); - unregister_netdev(ndev); - cancel_delayed_work_sync(&edev->sp_task); + qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); + + if (mode != QEDE_REMOVE_RECOVERY) { + unregister_netdev(ndev); + + cancel_delayed_work_sync(&edev->sp_task); + + edev->ops->common->set_power_state(cdev, PCI_D0); + + pci_set_drvdata(pdev, NULL); + } qede_ptp_disable(edev); - edev->ops->common->set_power_state(cdev, PCI_D0); - - pci_set_drvdata(pdev, NULL); - /* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); if (system_state == SYSTEM_POWER_OFF) @@ -1194,7 +1249,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) * [e.g., QED register callbacks] won't break anything when * accessing the netdevice. */ - free_netdev(ndev); + if (mode != QEDE_REMOVE_RECOVERY) + free_netdev(ndev); dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } @@ -1539,6 +1595,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev) return 0; } +static void qede_empty_tx_queue(struct qede_dev *edev, + struct qede_tx_queue *txq) +{ + unsigned int pkts_compl = 0, bytes_compl = 0; + struct netdev_queue *netdev_txq; + int rc, len = 0; + + netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id); + + while (qed_chain_get_cons_idx(&txq->tx_pbl) != + qed_chain_get_prod_idx(&txq->tx_pbl)) { + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + + rc = qede_free_tx_pkt(edev, txq, &len); + if (rc) { + DP_NOTICE(edev, + "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, + qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + break; + } + + bytes_compl += len; + pkts_compl++; + txq->sw_tx_cons++; + } + + netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl); +} + +static void qede_empty_tx_queues(struct qede_dev *edev) +{ + int i; + + for_each_queue(i) + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + int cos; + + for_each_cos_in_txq(edev, cos) { + struct qede_fastpath *fp; + + fp = &edev->fp_array[i]; + qede_empty_tx_queue(edev, + &fp->txq[cos]); + } + } +} + /* This function inits fp content and resets the SB, RXQ and TXQ structures */ static void qede_init_fp(struct qede_dev *edev) { @@ -2053,6 +2161,7 @@ out: enum qede_unload_mode { QEDE_UNLOAD_NORMAL, + QEDE_UNLOAD_RECOVERY, }; static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, @@ -2068,7 +2177,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags); - edev->state = QEDE_STATE_CLOSED; + if (mode != QEDE_UNLOAD_RECOVERY) + edev->state = QEDE_STATE_CLOSED; qede_rdma_dev_event_close(edev); @@ -2076,17 +2186,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev); - /* Reset the link */ - memset(&link_params, 0, sizeof(link_params)); - link_params.link_up = false; - edev->ops->common->set_link(edev->cdev, &link_params); - rc = qede_stop_queues(edev); - if (rc) { - qede_sync_free_irqs(edev); - goto out; - } + if (mode != QEDE_UNLOAD_RECOVERY) { + /* Reset the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params); - DP_INFO(edev, "Stopped Queues\n"); + rc = qede_stop_queues(edev); + if (rc) { + qede_sync_free_irqs(edev); + goto out; + } + + DP_INFO(edev, "Stopped Queues\n"); + } qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); @@ -2102,18 +2215,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_napi_disable_remove(edev); + if (mode == QEDE_UNLOAD_RECOVERY) + qede_empty_tx_queues(edev); + qede_free_mem_load(edev); qede_free_fp_array(edev); out: if (!is_locked) __qede_unlock(edev); + + if (mode != QEDE_UNLOAD_RECOVERY) + DP_NOTICE(edev, "Link is down\n"); + DP_INFO(edev, "Ending qede unload\n"); } enum qede_load_mode { QEDE_LOAD_NORMAL, QEDE_LOAD_RELOAD, + QEDE_LOAD_RECOVERY, }; static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, @@ -2293,6 +2414,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link) } } +static void qede_schedule_recovery_handler(void *dev) +{ + struct qede_dev *edev = dev; + + if (edev->state == QEDE_STATE_RECOVERY) { + DP_NOTICE(edev, + "Avoid scheduling a recovery handling since already in recovery state\n"); + return; + } + + set_bit(QEDE_SP_RECOVERY, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); + + DP_INFO(edev, "Scheduled a recovery handler\n"); +} + +static void qede_recovery_failed(struct qede_dev *edev) +{ + netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n"); + + netif_device_detach(edev->ndev); + + if (edev->cdev) + edev->ops->common->set_power_state(edev->cdev, PCI_D3hot); +} + +static void qede_recovery_handler(struct qede_dev *edev) +{ + u32 curr_state = edev->state; + int rc; + + DP_NOTICE(edev, "Starting a recovery process\n"); + + /* No need to acquire first the qede_lock since is done by qede_sp_task + * before calling this function. + */ + edev->state = QEDE_STATE_RECOVERY; + + edev->ops->common->recovery_prolog(edev->cdev); + + if (curr_state == QEDE_STATE_OPEN) + qede_unload(edev, QEDE_UNLOAD_RECOVERY, true); + + __qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY); + + rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level, + IS_VF(edev), QEDE_PROBE_RECOVERY); + if (rc) { + edev->cdev = NULL; + goto err; + } + + if (curr_state == QEDE_STATE_OPEN) { + rc = qede_load(edev, QEDE_LOAD_RECOVERY, true); + if (rc) + goto err; + + qede_config_rx_mode(edev->ndev); + udp_tunnel_get_rx_info(edev->ndev); + } + + edev->state = curr_state; + + DP_NOTICE(edev, "Recovery handling is done\n"); + + return; + +err: + qede_recovery_failed(edev); +} + static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1..ffabc2d2f082 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) if (!qedr_drv) return; + /* Leftovers from previous error recovery */ + edev->rdma_info.exp_recovery = false; edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, edev->ndev); } @@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) destroy_workqueue(edev->rdma_info.rdma_wq); } -int qede_rdma_dev_add(struct qede_dev *edev) +int qede_rdma_dev_add(struct qede_dev *edev, bool recovery) { - int rc = 0; + int rc; - if (qede_rdma_supported(edev)) { - rc = qede_rdma_create_wq(edev); - if (rc) - return rc; + if (!qede_rdma_supported(edev)) + return 0; - INIT_LIST_HEAD(&edev->rdma_info.entry); - mutex_lock(&qedr_dev_list_lock); - list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); - _qede_rdma_dev_add(edev); - mutex_unlock(&qedr_dev_list_lock); - } + /* Cannot start qedr while recovering since it wasn't fully stopped */ + if (recovery) + return 0; + + rc = qede_rdma_create_wq(edev); + if (rc) + return rc; + + INIT_LIST_HEAD(&edev->rdma_info.entry); + mutex_lock(&qedr_dev_list_lock); + list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); + _qede_rdma_dev_add(edev); + mutex_unlock(&qedr_dev_list_lock); return rc; } @@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev) { if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) qedr_drv->remove(edev->rdma_info.qedr_dev); - edev->rdma_info.qedr_dev = NULL; } -void qede_rdma_dev_remove(struct qede_dev *edev) +void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery) { if (!qede_rdma_supported(edev)) return; - qede_rdma_destroy_wq(edev); - mutex_lock(&qedr_dev_list_lock); - _qede_rdma_dev_remove(edev); - list_del(&edev->rdma_info.entry); - mutex_unlock(&qedr_dev_list_lock); + /* Cannot remove qedr while recovering since it wasn't fully stopped */ + if (!recovery) { + qede_rdma_destroy_wq(edev); + mutex_lock(&qedr_dev_list_lock); + if (!edev->rdma_info.exp_recovery) + _qede_rdma_dev_remove(edev); + edev->rdma_info.qedr_dev = NULL; + list_del(&edev->rdma_info.entry); + mutex_unlock(&qedr_dev_list_lock); + } else { + if (!edev->rdma_info.exp_recovery) { + mutex_lock(&qedr_dev_list_lock); + _qede_rdma_dev_remove(edev); + mutex_unlock(&qedr_dev_list_lock); + } + edev->rdma_info.exp_recovery = true; + } } static void _qede_rdma_dev_open(struct qede_dev *edev) @@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv) mutex_lock(&qedr_dev_list_lock); list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { - if (edev->rdma_info.qedr_dev) + /* If device has experienced recovery it was already removed */ + if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery) _qede_rdma_dev_remove(edev); } qedr_drv = NULL; @@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev, { struct qede_rdma_event_work *event_node; + /* If a recovery was experienced avoid adding the event */ + if (edev->rdma_info.exp_recovery) + return; + if (!edev->rdma_info.qedr_dev) return; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 44f6e4873aad..4f910c4f67b0 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -691,7 +691,7 @@ static void cp_tx (struct cp_private *cp) } bytes_compl += skb->len; pkts_compl++; - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); } cp->tx_skb[tx_tail] = NULL; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index abb94c543aa2..9dc68981751b 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -229,7 +229,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl); -static int use_dac = -1; static struct { u32 msg_enable; } debug = { -1 }; @@ -639,6 +638,7 @@ struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; struct net_device *dev; + struct phy_device *phydev; struct napi_struct napi; u32 msg_enable; u16 mac_version; @@ -679,12 +679,12 @@ struct rtl8169_private { } wk; unsigned supports_gmii:1; - struct mii_bus *mii_bus; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; u32 saved_wolopts; + const char *fw_name; struct rtl_fw { const struct firmware *fw; @@ -697,15 +697,12 @@ struct rtl8169_private { size_t size; } phy_action; } *rtl_fw; -#define RTL_FIRMWARE_UNKNOWN ERR_PTR(-EAGAIN) u32 ocp_base; }; MODULE_AUTHOR("Realtek and the Linux r8169 crew "); MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver"); -module_param(use_dac, int, 0); -MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot."); module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)"); MODULE_SOFTDEP("pre: realtek"); @@ -745,6 +742,16 @@ static void rtl_unlock_work(struct rtl8169_private *tp) mutex_unlock(&tp->wk.mutex); } +static void rtl_lock_config_regs(struct rtl8169_private *tp) +{ + RTL_W8(tp, Cfg9346, Cfg9346_Lock); +} + +static void rtl_unlock_config_regs(struct rtl8169_private *tp) +{ + RTL_W8(tp, Cfg9346, Cfg9346_Unlock); +} + static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force) { pcie_capability_clear_and_set_word(tp->pci_dev, PCI_EXP_DEVCTL, @@ -1278,11 +1285,6 @@ static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr) RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0; } -static u16 rtl_get_events(struct rtl8169_private *tp) -{ - return RTL_R16(tp, IntrStatus); -} - static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) { RTL_W16(tp, IntrStatus, bits); @@ -1313,7 +1315,7 @@ static void rtl8169_irq_mask_and_ack(struct rtl8169_private *tp) static void rtl_link_chg_patch(struct rtl8169_private *tp) { struct net_device *dev = tp->dev; - struct phy_device *phydev = dev->phydev; + struct phy_device *phydev = tp->phydev; if (!netif_running(dev)) return; @@ -1369,41 +1371,6 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp) #define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST) -static u32 __rtl8169_get_wol(struct rtl8169_private *tp) -{ - u8 options; - u32 wolopts = 0; - - options = RTL_R8(tp, Config1); - if (!(options & PMEnable)) - return 0; - - options = RTL_R8(tp, Config3); - if (options & LinkUp) - wolopts |= WAKE_PHY; - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - if (rtl_eri_read(tp, 0xdc, ERIAR_EXGMAC) & MagicPacket_v2) - wolopts |= WAKE_MAGIC; - break; - default: - if (options & MagicPacket) - wolopts |= WAKE_MAGIC; - break; - } - - options = RTL_R8(tp, Config5); - if (options & UWF) - wolopts |= WAKE_UCAST; - if (options & BWF) - wolopts |= WAKE_BCAST; - if (options & MWF) - wolopts |= WAKE_MCAST; - - return wolopts; -} - static void rtl8169_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct rtl8169_private *tp = netdev_priv(dev); @@ -1431,7 +1398,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) }; u8 options; - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: @@ -1479,7 +1446,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) break; } - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); device_set_wakeup_enable(tp_to_dev(tp), wolopts); } @@ -1508,11 +1475,6 @@ static int rtl8169_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) return 0; } -static const char *rtl_lookup_firmware_name(struct rtl8169_private *tp) -{ - return rtl_chip_infos[tp->mac_version].fw_name; -} - static void rtl8169_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -1522,7 +1484,7 @@ static void rtl8169_get_drvinfo(struct net_device *dev, strlcpy(info->driver, MODULENAME, sizeof(info->driver)); strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info)); BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version)); - if (!IS_ERR_OR_NULL(rtl_fw)) + if (rtl_fw) strlcpy(info->fw_version, rtl_fw->version, sizeof(info->fw_version)); } @@ -1987,6 +1949,196 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) return 0; } +static int rtl_get_eee_supp(struct rtl8169_private *tp) +{ + struct phy_device *phydev = tp->phydev; + int ret; + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34: + case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: + ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + phy_write(phydev, 0x1f, 0x0a5c); + ret = phy_read(phydev, 0x12); + phy_write(phydev, 0x1f, 0x0000); + break; + default: + ret = -EPROTONOSUPPORT; + break; + } + + return ret; +} + +static int rtl_get_eee_lpadv(struct rtl8169_private *tp) +{ + struct phy_device *phydev = tp->phydev; + int ret; + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34: + case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + phy_write(phydev, 0x1f, 0x0a5d); + ret = phy_read(phydev, 0x11); + phy_write(phydev, 0x1f, 0x0000); + break; + default: + ret = -EPROTONOSUPPORT; + break; + } + + return ret; +} + +static int rtl_get_eee_adv(struct rtl8169_private *tp) +{ + struct phy_device *phydev = tp->phydev; + int ret; + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34: + case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: + ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + phy_write(phydev, 0x1f, 0x0a5d); + ret = phy_read(phydev, 0x10); + phy_write(phydev, 0x1f, 0x0000); + break; + default: + ret = -EPROTONOSUPPORT; + break; + } + + return ret; +} + +static int rtl_set_eee_adv(struct rtl8169_private *tp, int val) +{ + struct phy_device *phydev = tp->phydev; + int ret = 0; + + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_34: + case RTL_GIGA_MAC_VER_35: + case RTL_GIGA_MAC_VER_36: + case RTL_GIGA_MAC_VER_38: + ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + phy_write(phydev, 0x1f, 0x0a5d); + phy_write(phydev, 0x10, val); + phy_write(phydev, 0x1f, 0x0000); + break; + default: + ret = -EPROTONOSUPPORT; + break; + } + + return ret; +} + +static int rtl8169_get_eee(struct net_device *dev, struct ethtool_eee *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = tp_to_dev(tp); + int ret; + + pm_runtime_get_noresume(d); + + if (!pm_runtime_active(d)) { + ret = -EOPNOTSUPP; + goto out; + } + + /* Get Supported EEE */ + ret = rtl_get_eee_supp(tp); + if (ret < 0) + goto out; + data->supported = mmd_eee_cap_to_ethtool_sup_t(ret); + + /* Get advertisement EEE */ + ret = rtl_get_eee_adv(tp); + if (ret < 0) + goto out; + data->advertised = mmd_eee_adv_to_ethtool_adv_t(ret); + data->eee_enabled = !!data->advertised; + + /* Get LP advertisement EEE */ + ret = rtl_get_eee_lpadv(tp); + if (ret < 0) + goto out; + data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(ret); + data->eee_active = !!(data->advertised & data->lp_advertised); +out: + pm_runtime_put_noidle(d); + return ret < 0 ? ret : 0; +} + +static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + struct device *d = tp_to_dev(tp); + int old_adv, adv = 0, cap, ret; + + pm_runtime_get_noresume(d); + + if (!dev->phydev || !pm_runtime_active(d)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (dev->phydev->autoneg == AUTONEG_DISABLE || + dev->phydev->duplex != DUPLEX_FULL) { + ret = -EPROTONOSUPPORT; + goto out; + } + + /* Get Supported EEE */ + ret = rtl_get_eee_supp(tp); + if (ret < 0) + goto out; + cap = ret; + + ret = rtl_get_eee_adv(tp); + if (ret < 0) + goto out; + old_adv = ret; + + if (data->eee_enabled) { + adv = !data->advertised ? cap : + ethtool_adv_to_mmd_eee_adv_t(data->advertised) & cap; + /* Mask prohibited EEE modes */ + adv &= ~dev->phydev->eee_broken_modes; + } + + if (old_adv != adv) { + ret = rtl_set_eee_adv(tp, adv); + if (ret < 0) + goto out; + + /* Restart autonegotiation so the new modes get sent to the + * link partner. + */ + ret = phy_restart_aneg(dev->phydev); + } + +out: + pm_runtime_put_noidle(d); + return ret < 0 ? ret : 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .get_drvinfo = rtl8169_get_drvinfo, .get_regs_len = rtl8169_get_regs_len, @@ -2003,10 +2155,20 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { .get_ethtool_stats = rtl8169_get_ethtool_stats, .get_ts_info = ethtool_op_get_ts_info, .nway_reset = phy_ethtool_nway_reset, + .get_eee = rtl8169_get_eee, + .set_eee = rtl8169_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; +static void rtl_enable_eee(struct rtl8169_private *tp) +{ + int supported = rtl_get_eee_supp(tp); + + if (supported > 0) + rtl_set_eee_adv(tp, supported); +} + static void rtl8169_get_mac_version(struct rtl8169_private *tp) { /* @@ -2199,7 +2361,7 @@ static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) if (fw->size % FW_OPCODE_SIZE) goto out; - strlcpy(version, rtl_lookup_firmware_name(tp), RTL_VER_SIZE); + strlcpy(version, tp->fw_name, RTL_VER_SIZE); pa->code = (__le32 *)fw->data; pa->size = fw->size / FW_OPCODE_SIZE; @@ -2374,20 +2536,18 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) static void rtl_release_firmware(struct rtl8169_private *tp) { - if (!IS_ERR_OR_NULL(tp->rtl_fw)) { + if (tp->rtl_fw) { release_firmware(tp->rtl_fw->fw); kfree(tp->rtl_fw); + tp->rtl_fw = NULL; } - tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; } static void rtl_apply_firmware(struct rtl8169_private *tp) { - struct rtl_fw *rtl_fw = tp->rtl_fw; - /* TODO: release firmware once rtl_phy_write_fw signals failures. */ - if (!IS_ERR_OR_NULL(rtl_fw)) - rtl_phy_write_fw(tp, rtl_fw); + if (tp->rtl_fw) + rtl_phy_write_fw(tp, tp->rtl_fw); } static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) @@ -2398,6 +2558,33 @@ static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) rtl_apply_firmware(tp); } +static void rtl8168_config_eee_mac(struct rtl8169_private *tp) +{ + rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_1111, 0x0003, 0x0000, ERIAR_EXGMAC); +} + +static void rtl8168f_config_eee_phy(struct rtl8169_private *tp) +{ + struct phy_device *phydev = tp->phydev; + + phy_write(phydev, 0x1f, 0x0007); + phy_write(phydev, 0x1e, 0x0020); + phy_set_bits(phydev, 0x15, BIT(8)); + + phy_write(phydev, 0x1f, 0x0005); + phy_write(phydev, 0x05, 0x8b85); + phy_set_bits(phydev, 0x06, BIT(13)); + + phy_write(phydev, 0x1f, 0x0000); +} + +static void rtl8168g_config_eee_phy(struct rtl8169_private *tp) +{ + phy_write(tp->phydev, 0x1f, 0x0a43); + phy_set_bits(tp->phydev, 0x11, BIT(4)); + phy_write(tp->phydev, 0x1f, 0x0000); +} + static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) { static const struct phy_reg phy_reg_init[] = { @@ -3165,22 +3352,8 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x06, 0x4000, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); - /* EEE setting */ - rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_1111, 0x0003, 0x0000, ERIAR_EXGMAC); - rtl_writephy(tp, 0x1f, 0x0005); - rtl_writephy(tp, 0x05, 0x8b85); - rtl_w0w1_phy(tp, 0x06, 0x2000, 0x0000); - rtl_writephy(tp, 0x1f, 0x0004); - rtl_writephy(tp, 0x1f, 0x0007); - rtl_writephy(tp, 0x1e, 0x0020); - rtl_w0w1_phy(tp, 0x15, 0x0100, 0x0000); - rtl_writephy(tp, 0x1f, 0x0002); - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x0d, 0x0007); - rtl_writephy(tp, 0x0e, 0x003c); - rtl_writephy(tp, 0x0d, 0x4007); - rtl_writephy(tp, 0x0e, 0x0006); - rtl_writephy(tp, 0x0d, 0x0000); + rtl8168f_config_eee_phy(tp); + rtl_enable_eee(tp); /* Green feature */ rtl_writephy(tp, 0x1f, 0x0003); @@ -3215,6 +3388,9 @@ static void rtl8168f_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x05, 0x8b86); rtl_w0w1_phy(tp, 0x06, 0x0001, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); + + rtl8168f_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp) @@ -3348,22 +3524,6 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x06, 0x8000, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); - /* eee setting */ - rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0001, 0x00, 0x03, ERIAR_EXGMAC); - rtl_writephy(tp, 0x1f, 0x0005); - rtl_writephy(tp, 0x05, 0x8b85); - rtl_w0w1_phy(tp, 0x06, 0x0000, 0x2000); - rtl_writephy(tp, 0x1f, 0x0004); - rtl_writephy(tp, 0x1f, 0x0007); - rtl_writephy(tp, 0x1e, 0x0020); - rtl_w0w1_phy(tp, 0x15, 0x0000, 0x0100); - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x0d, 0x0007); - rtl_writephy(tp, 0x0e, 0x003c); - rtl_writephy(tp, 0x0d, 0x4007); - rtl_writephy(tp, 0x0e, 0x0000); - rtl_writephy(tp, 0x0d, 0x0000); - /* Green feature */ rtl_writephy(tp, 0x1f, 0x0003); rtl_w0w1_phy(tp, 0x19, 0x0000, 0x0001); @@ -3371,6 +3531,30 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); } +static void rtl8168g_disable_aldps(struct rtl8169_private *tp) +{ + phy_write(tp->phydev, 0x1f, 0x0a43); + phy_clear_bits(tp->phydev, 0x10, BIT(2)); +} + +static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp) +{ + struct phy_device *phydev = tp->phydev; + + phy_write(phydev, 0x1f, 0x0bcc); + phy_clear_bits(phydev, 0x14, BIT(8)); + + phy_write(phydev, 0x1f, 0x0a44); + phy_set_bits(phydev, 0x11, BIT(7) | BIT(6)); + + phy_write(phydev, 0x1f, 0x0a43); + phy_write(phydev, 0x13, 0x8084); + phy_clear_bits(phydev, 0x14, BIT(14) | BIT(13)); + phy_set_bits(phydev, 0x10, BIT(12) | BIT(1) | BIT(0)); + + phy_write(phydev, 0x1f, 0x0000); +} + static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) { rtl_apply_firmware(tp); @@ -3397,14 +3581,7 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0a44); rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000); - rtl_writephy(tp, 0x1f, 0x0bcc); - rtl_w0w1_phy(tp, 0x14, 0x0100, 0x0000); - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000); - rtl_writephy(tp, 0x1f, 0x0a43); - rtl_writephy(tp, 0x13, 0x8084); - rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000); - rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000); + rtl8168g_phy_adjust_10m_aldps(tp); /* EEE auto-fallback function */ rtl_writephy(tp, 0x1f, 0x0a4b); @@ -3429,17 +3606,16 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x14, 0x9065); rtl_writephy(tp, 0x14, 0x1065); - /* Check ALDPS bit, disable it if enabled */ - rtl_writephy(tp, 0x1f, 0x0a43); - if (rtl_readphy(tp, 0x10) & 0x0004) - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004); - - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_disable_aldps(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp) { rtl_apply_firmware(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp) @@ -3544,12 +3720,9 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080); rtl_writephy(tp, 0x1f, 0x0000); - /* Check ALDPS bit, disable it if enabled */ - rtl_writephy(tp, 0x1f, 0x0a43); - if (rtl_readphy(tp, 0x10) & 0x0004) - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004); - - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_disable_aldps(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) @@ -3617,12 +3790,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080); rtl_writephy(tp, 0x1f, 0x0000); - /* Check ALDPS bit, disable it if enabled */ - rtl_writephy(tp, 0x1f, 0x0a43); - if (rtl_readphy(tp, 0x10) & 0x0004) - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004); - - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_disable_aldps(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) @@ -3632,16 +3802,7 @@ static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); - /* patch 10M & ALDPS */ - rtl_writephy(tp, 0x1f, 0x0bcc); - rtl_w0w1_phy(tp, 0x14, 0x0000, 0x0100); - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000); - rtl_writephy(tp, 0x1f, 0x0a43); - rtl_writephy(tp, 0x13, 0x8084); - rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000); - rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_phy_adjust_10m_aldps(tp); /* Enable EEE auto-fallback function */ rtl_writephy(tp, 0x1f, 0x0a4b); @@ -3659,26 +3820,14 @@ static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000); rtl_writephy(tp, 0x1f, 0x0000); - /* Check ALDPS bit, disable it if enabled */ - rtl_writephy(tp, 0x1f, 0x0a43); - if (rtl_readphy(tp, 0x10) & 0x0004) - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004); - - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_disable_aldps(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp) { - /* patch 10M & ALDPS */ - rtl_writephy(tp, 0x1f, 0x0bcc); - rtl_w0w1_phy(tp, 0x14, 0x0000, 0x0100); - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x00c0, 0x0000); - rtl_writephy(tp, 0x1f, 0x0a43); - rtl_writephy(tp, 0x13, 0x8084); - rtl_w0w1_phy(tp, 0x14, 0x0000, 0x6000); - rtl_w0w1_phy(tp, 0x10, 0x1003, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_phy_adjust_10m_aldps(tp); /* Enable UC LPF tune function */ rtl_writephy(tp, 0x1f, 0x0a43); @@ -3750,12 +3899,9 @@ static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x14, 0x1065); rtl_writephy(tp, 0x1f, 0x0000); - /* Check ALDPS bit, disable it if enabled */ - rtl_writephy(tp, 0x1f, 0x0a43); - if (rtl_readphy(tp, 0x10) & 0x0004) - rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004); - - rtl_writephy(tp, 0x1f, 0x0000); + rtl8168g_disable_aldps(tp); + rtl8168g_config_eee_phy(tp); + rtl_enable_eee(tp); } static void rtl8102e_hw_phy_config(struct rtl8169_private *tp) @@ -3994,24 +4140,24 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) } /* We may have called phy_speed_down before */ - phy_speed_up(dev->phydev); + phy_speed_up(tp->phydev); - genphy_soft_reset(dev->phydev); + genphy_soft_reset(tp->phydev); /* It was reported that several chips end up with 10MBit/Half on a * 1GBit link after resuming from S3. For whatever reason the PHY on * these chips doesn't properly start a renegotiation when soft-reset. * Explicitly requesting a renegotiation fixes this. */ - if (dev->phydev->autoneg == AUTONEG_ENABLE) - phy_restart_aneg(dev->phydev); + if (tp->phydev->autoneg == AUTONEG_ENABLE) + phy_restart_aneg(tp->phydev); } static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) { rtl_lock_work(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); RTL_W32(tp, MAC4, addr[4] | addr[5] << 8); RTL_R32(tp, MAC4); @@ -4022,7 +4168,7 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) if (tp->mac_version == RTL_GIGA_MAC_VER_34) rtl_rar_exgmac_set(tp, addr); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); rtl_unlock_work(tp); } @@ -4049,10 +4195,12 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { + struct rtl8169_private *tp = netdev_priv(dev); + if (!netif_running(dev)) return -ENODEV; - return phy_mii_ioctl(dev->phydev, ifr, cmd); + return phy_mii_ioctl(tp->phydev, ifr, cmd); } static void rtl_init_mdio_ops(struct rtl8169_private *tp) @@ -4101,15 +4249,10 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) { - struct phy_device *phydev; - - if (!__rtl8169_get_wol(tp)) + if (!device_may_wakeup(tp_to_dev(tp))) return false; - /* phydev may not be attached to netdevice */ - phydev = mdiobus_get_phy(tp->mii_bus, 0); - - phy_speed_down(phydev, false); + phy_speed_down(tp->phydev, false); rtl_wol_suspend_quirk(tp); return true; @@ -4178,7 +4321,7 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) break; } - phy_resume(tp->dev->phydev); + phy_resume(tp->phydev); /* give MAC/PHY some time to resume */ msleep(20); } @@ -4234,18 +4377,18 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) { if (tp->jumbo_ops.enable) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->jumbo_ops.enable(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); } } static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) { if (tp->jumbo_ops.disable) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->jumbo_ops.disable(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); } } @@ -4378,21 +4521,20 @@ static void rtl_hw_reset(struct rtl8169_private *tp) rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100); } -static void rtl_request_uncached_firmware(struct rtl8169_private *tp) +static void rtl_request_firmware(struct rtl8169_private *tp) { struct rtl_fw *rtl_fw; - const char *name; int rc = -ENOMEM; - name = rtl_lookup_firmware_name(tp); - if (!name) - goto out_no_firmware; + /* firmware loaded already or no firmware available */ + if (tp->rtl_fw || !tp->fw_name) + return; rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL); if (!rtl_fw) goto err_warn; - rc = request_firmware(&rtl_fw->fw, name, tp_to_dev(tp)); + rc = request_firmware(&rtl_fw->fw, tp->fw_name, tp_to_dev(tp)); if (rc < 0) goto err_free; @@ -4401,7 +4543,7 @@ static void rtl_request_uncached_firmware(struct rtl8169_private *tp) goto err_release_firmware; tp->rtl_fw = rtl_fw; -out: + return; err_release_firmware: @@ -4410,16 +4552,7 @@ err_free: kfree(rtl_fw); err_warn: netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n", - name, rc); -out_no_firmware: - tp->rtl_fw = NULL; - goto out; -} - -static void rtl_request_firmware(struct rtl8169_private *tp) -{ - if (IS_ERR(tp->rtl_fw)) - rtl_request_uncached_firmware(tp); + tp->fw_name, rc); } static void rtl_rx_close(struct rtl8169_private *tp) @@ -4566,13 +4699,13 @@ static void rtl_set_rx_mode(struct net_device *dev) static void rtl_hw_start(struct rtl8169_private *tp) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); tp->hw_start(tp); rtl_set_rx_max_size(tp); rtl_set_rx_tx_desc_registers(tp); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ RTL_R8(tp, IntrMask); @@ -4696,18 +4829,10 @@ static void rtl_enable_clock_request(struct rtl8169_private *tp) PCI_EXP_LNKCTL_CLKREQ_EN); } -static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable) +static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp) { - u8 data; - - data = RTL_R8(tp, Config3); - - if (enable) - data |= Rdy_to_L23; - else - data &= ~Rdy_to_L23; - - RTL_W8(tp, Config3, data); + /* work around an issue when PCI reset occurs during L2/L3 state */ + RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23); } static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) @@ -4965,6 +5090,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) /* Adjust EEE LED frequency */ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07); + rtl8168_config_eee_mac(tp); + RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); @@ -4997,6 +5124,8 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en); + + rtl8168_config_eee_mac(tp); } static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) @@ -5028,7 +5157,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) }; rtl_hw_start_8168f(tp); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_ephy_init(tp, e_info_8168f_1, ARRAY_SIZE(e_info_8168f_1)); @@ -5059,10 +5188,12 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp) /* Adjust EEE LED frequency */ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07); + rtl8168_config_eee_mac(tp); + rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) @@ -5161,6 +5292,8 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) /* Adjust EEE LED frequency */ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07); + rtl8168_config_eee_mac(tp); + RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN); @@ -5168,7 +5301,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_w0w1_eri(tp, 0x1b0, ERIAR_MASK_0011, 0x0000, 0x1000, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_writephy(tp, 0x1f, 0x0c42); rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff); @@ -5241,11 +5374,13 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) /* Adjust EEE LED frequency */ RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07); + rtl8168_config_eee_mac(tp); + rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06, ERIAR_EXGMAC); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp) @@ -5516,7 +5651,7 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8105e_2(struct rtl8169_private *tp) @@ -5551,7 +5686,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00, ERIAR_EXGMAC); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); } static void rtl_hw_start_8106(struct rtl8169_private *tp) @@ -5565,7 +5700,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); - rtl_pcie_state_l2l3_enable(tp, false); + rtl_pcie_state_l2l3_disable(tp); rtl_hw_aspm_clkreq_enable(tp, true); } @@ -5665,11 +5800,6 @@ static inline void rtl8169_mark_to_asic(struct RxDesc *desc) desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE); } -static inline void *rtl8169_align(void *data) -{ - return (void *)ALIGN((long)data, 16); -} - static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, struct RxDesc *desc) { @@ -5682,15 +5812,13 @@ static struct sk_buff *rtl8169_alloc_rx_data(struct rtl8169_private *tp, if (!data) return NULL; - if (rtl8169_align(data) != data) { - kfree(data); - data = kmalloc_node(R8169_RX_BUF_SIZE + 15, GFP_KERNEL, node); - if (!data) - return NULL; + /* Memory should be properly aligned, but better check. */ + if (!IS_ALIGNED((unsigned long)data, 8)) { + netdev_err_once(tp->dev, "RX buffer not 8-byte-aligned\n"); + goto err_out; } - mapping = dma_map_single(d, rtl8169_align(data), R8169_RX_BUF_SIZE, - DMA_FROM_DEVICE); + mapping = dma_map_single(d, data, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(d, mapping))) { if (net_ratelimit()) netif_err(tp, drv, tp->dev, "Failed to map RX DMA!\n"); @@ -6193,16 +6321,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_REC_MASTER_ABORT | PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_SIG_TARGET_ABORT)); - /* The infamous DAC f*ckup only happens at boot time */ - if ((tp->cp_cmd & PCIDAC) && !tp->cur_rx) { - netif_info(tp, intr, dev, "disabling PCI DAC\n"); - tp->cp_cmd &= ~PCIDAC; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); - dev->features &= ~NETIF_F_HIGHDMA; - } - - rtl8169_hw_reset(tp); - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } @@ -6298,7 +6416,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, struct sk_buff *skb; struct device *d = tp_to_dev(tp); - data = rtl8169_align(data); dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); prefetch(data); skb = napi_alloc_skb(&tp->napi, pkt_size); @@ -6409,7 +6526,7 @@ release_descriptor: static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { struct rtl8169_private *tp = dev_instance; - u16 status = rtl_get_events(tp); + u16 status = RTL_R16(tp, IntrStatus); u16 irq_mask = RTL_R16(tp, IntrMask); if (status == 0xffff || !(status & irq_mask)) @@ -6420,8 +6537,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) goto out; } - if (status & LinkChg && tp->dev->phydev) - phy_mac_interrupt(tp->dev->phydev); + if (status & LinkChg) + phy_mac_interrupt(tp->phydev); if (unlikely(status & RxFIFOOver && tp->mac_version == RTL_GIGA_MAC_VER_11)) { @@ -6512,12 +6629,12 @@ static void r8169_phylink_handler(struct net_device *ndev) } if (net_ratelimit()) - phy_print_status(ndev->phydev); + phy_print_status(tp->phydev); } static int r8169_phy_connect(struct rtl8169_private *tp) { - struct phy_device *phydev = mdiobus_get_phy(tp->mii_bus, 0); + struct phy_device *phydev = tp->phydev; phy_interface_t phy_mode; int ret; @@ -6544,7 +6661,7 @@ static void rtl8169_down(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - phy_stop(dev->phydev); + phy_stop(tp->phydev); napi_disable(&tp->napi); netif_stop_queue(dev); @@ -6586,7 +6703,7 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - phy_disconnect(dev->phydev); + phy_disconnect(tp->phydev); pci_free_irq(pdev, 0, tp); @@ -6637,10 +6754,6 @@ static int rtl_open(struct net_device *dev) if (retval < 0) goto err_free_rx_1; - INIT_WORK(&tp->wk.work, rtl_task); - - smp_mb(); - rtl_request_firmware(tp); retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, tp, @@ -6667,7 +6780,7 @@ static int rtl_open(struct net_device *dev) if (!rtl8169_init_counter_offsets(tp)) netif_warn(tp, hw, dev, "counter reset/update failed\n"); - phy_start(dev->phydev); + phy_start(tp->phydev); netif_start_queue(dev); rtl_unlock_work(tp); @@ -6756,7 +6869,7 @@ static void rtl8169_net_suspend(struct net_device *dev) if (!netif_running(dev)) return; - phy_stop(dev->phydev); + phy_stop(tp->phydev); netif_device_detach(dev); rtl_lock_work(tp); @@ -6791,14 +6904,13 @@ static void __rtl8169_resume(struct net_device *dev) rtl_pll_power_up(tp); rtl8169_init_phy(dev, tp); - phy_start(tp->dev->phydev); + phy_start(tp->phydev); rtl_lock_work(tp); napi_enable(&tp->napi); set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); + rtl_reset_work(tp); rtl_unlock_work(tp); - - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } static int rtl8169_resume(struct device *device) @@ -6935,7 +7047,7 @@ static void rtl_remove_one(struct pci_dev *pdev) netif_napi_del(&tp->napi); unregister_netdev(dev); - mdiobus_unregister(tp->mii_bus); + mdiobus_unregister(tp->phydev->mdio.bus); rtl_release_firmware(tp); @@ -6995,9 +7107,9 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) unsigned int flags; if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { - RTL_W8(tp, Cfg9346, Cfg9346_Unlock); + rtl_unlock_config_regs(tp); RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); - RTL_W8(tp, Cfg9346, Cfg9346_Lock); + rtl_lock_config_regs(tp); flags = PCI_IRQ_LEGACY; } else { flags = PCI_IRQ_ALL_TYPES; @@ -7042,7 +7154,6 @@ static int r8169_mdio_write_reg(struct mii_bus *mii_bus, int phyaddr, static int r8169_mdio_register(struct rtl8169_private *tp) { struct pci_dev *pdev = tp->pci_dev; - struct phy_device *phydev; struct mii_bus *new_bus; int ret; @@ -7064,16 +7175,14 @@ static int r8169_mdio_register(struct rtl8169_private *tp) if (ret) return ret; - phydev = mdiobus_get_phy(new_bus, 0); - if (!phydev) { + tp->phydev = mdiobus_get_phy(new_bus, 0); + if (!tp->phydev) { mdiobus_unregister(new_bus); return -ENODEV; } /* PHY will be woken up in rtl_open() */ - phy_suspend(phydev); - - tp->mii_bus = new_bus; + phy_suspend(tp->phydev); return 0; } @@ -7171,6 +7280,32 @@ static void rtl_disable_clk(void *data) clk_disable_unprepare(data); } +static int rtl_get_ether_clk(struct rtl8169_private *tp) +{ + struct device *d = tp_to_dev(tp); + struct clk *clk; + int rc; + + clk = devm_clk_get(d, "ether_clk"); + if (IS_ERR(clk)) { + rc = PTR_ERR(clk); + if (rc == -ENOENT) + /* clk-core allows NULL (for suspend / resume) */ + rc = 0; + else if (rc != -EPROBE_DEFER) + dev_err(d, "failed to get clk: %d\n", rc); + } else { + tp->clk = clk; + rc = clk_prepare_enable(clk); + if (rc) + dev_err(d, "failed to enable clk: %d\n", rc); + else + rc = devm_add_action_or_reset(d, rtl_disable_clk, clk); + } + + return rc; +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; @@ -7192,30 +7327,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->supports_gmii = cfg->has_gmii; /* Get the *optional* external "ether_clk" used on some boards */ - tp->clk = devm_clk_get(&pdev->dev, "ether_clk"); - if (IS_ERR(tp->clk)) { - rc = PTR_ERR(tp->clk); - if (rc == -ENOENT) { - /* clk-core allows NULL (for suspend / resume) */ - tp->clk = NULL; - } else if (rc == -EPROBE_DEFER) { - return rc; - } else { - dev_err(&pdev->dev, "failed to get clk: %d\n", rc); - return rc; - } - } else { - rc = clk_prepare_enable(tp->clk); - if (rc) { - dev_err(&pdev->dev, "failed to enable clk: %d\n", rc); - return rc; - } - - rc = devm_add_action_or_reset(&pdev->dev, rtl_disable_clk, - tp->clk); - if (rc) - return rc; - } + rc = rtl_get_ether_clk(tp); + if (rc) + return rc; /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); @@ -7260,13 +7374,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->cp_cmd = RTL_R16(tp, CPlusCmd); - if (sizeof(dma_addr_t) > 4 && (use_dac == 1 || (use_dac == -1 && - tp->mac_version >= RTL_GIGA_MAC_VER_18)) && + if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 && !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { - - /* CPlusCmd Dual Access Cycle is only needed for non-PCIe */ - if (!pci_is_pcie(pdev)) - tp->cp_cmd |= PCIDAC; dev->features |= NETIF_F_HIGHDMA; } else { rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); @@ -7297,9 +7406,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } - tp->saved_wolopts = __rtl8169_get_wol(tp); - mutex_init(&tp->wk.mutex); + INIT_WORK(&tp->wk.work, rtl_task); u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); @@ -7365,7 +7473,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->irq_mask = RTL_EVENT_NAPI | cfg->irq_mask; tp->coalesce_info = cfg->coalesce_info; - tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; + tp->fw_name = rtl_chip_infos[chipset].fw_name; tp->counters = dmam_alloc_coherent (&pdev->dev, sizeof(*tp->counters), &tp->counters_phys_addr, @@ -7406,7 +7514,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_mdio_unregister: - mdiobus_unregister(tp->mii_bus); + mdiobus_unregister(tp->phydev->mdio.bus); return rc; } diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ffc1ada4e6da..d28c8f9ca55b 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -343,7 +343,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) int i; priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) + - ETH_HLEN + VLAN_HLEN; + ETH_HLEN + VLAN_HLEN + sizeof(__sum16); /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], @@ -524,13 +524,15 @@ static void ravb_rx_csum(struct sk_buff *skb) { u8 *hw_csum; - /* The hardware checksum is 2 bytes appended to packet data */ - if (unlikely(skb->len < 2)) + /* The hardware checksum is contained in sizeof(__sum16) (2) bytes + * appended to packet data + */ + if (unlikely(skb->len < sizeof(__sum16))) return; - hw_csum = skb_tail_pointer(skb) - 2; + hw_csum = skb_tail_pointer(skb) - sizeof(__sum16); skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum)); skb->ip_summed = CHECKSUM_COMPLETE; - skb_trim(skb, skb->len - 2); + skb_trim(skb, skb->len - sizeof(__sum16)); } /* Packet receive function for Ethernet AVB */ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ee42d4a887d7..6062e99fa69b 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6047,22 +6047,25 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_ROMCONFIG_DEFAULTS, 0, 0, "sfc_exp_rom_cfg_dflt" }, { NVRAM_PARTITION_TYPE_STATUS, 0, 0, "sfc_status" }, }; +#define EF10_NVRAM_PARTITION_COUNT ARRAY_SIZE(efx_ef10_nvram_types) static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, struct efx_mcdi_mtd_partition *part, - unsigned int type) + unsigned int type, + unsigned long *found) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); const struct efx_ef10_nvram_type_info *info; size_t size, erase_size, outlen; + int type_idx = 0; bool protected; int rc; - for (info = efx_ef10_nvram_types; ; info++) { - if (info == - efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types)) + for (type_idx = 0; ; type_idx++) { + if (type_idx == EF10_NVRAM_PARTITION_COUNT) return -ENODEV; + info = efx_ef10_nvram_types + type_idx; if ((type & ~info->type_mask) == info->type) break; } @@ -6082,6 +6085,13 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, /* Protected partitions are read only. */ erase_size = 0; + /* If we've already exposed a partition of this type, hide this + * duplicate. All operations on MTDs are keyed by the type anyway, + * so we can't act on the duplicate. + */ + if (__test_and_set_bit(type_idx, found)) + return -EEXIST; + part->nvram_type = type; MCDI_SET_DWORD(inbuf, NVRAM_METADATA_IN_TYPE, type); @@ -6113,6 +6123,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT); struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; @@ -6141,11 +6152,13 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) for (i = 0; i < n_parts_total; i++) { type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID, i); - rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type); - if (rc == 0) - n_parts++; - else if (rc != -ENODEV) + rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type, + found); + if (rc == -EEXIST || rc == -ENODEV) + continue; + if (rc) goto fail; + n_parts++; } rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 6209cc1fb305..f194235153f9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -105,6 +105,16 @@ config DWMAC_OXNAS This selects the Oxford Semiconductor OXNASSoC glue layer support for the stmmac device driver. This driver is used for OX820. +config DWMAC_QCOM_ETHQOS + tristate "Qualcomm ETHQOS support" + default ARCH_QCOM + depends on OF && (ARCH_QCOM || COMPILE_TEST) + help + Support for the Qualcomm ETHQOS core. + + This selects the Qualcomm ETHQOS glue layer support for the + stmmac device driver. + config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" default ARCH_ROCKCHIP diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index bf09701d2623..c529c21e9bdd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_DWMAC_LPC18XX) += dwmac-lpc18xx.o obj-$(CONFIG_DWMAC_MEDIATEK) += dwmac-mediatek.o obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o obj-$(CONFIG_DWMAC_OXNAS) += dwmac-oxnas.o +obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c new file mode 100644 index 000000000000..7ec895407d23 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018-19, Linaro Limited + +#include +#include +#include +#include +#include +#include "stmmac.h" +#include "stmmac_platform.h" + +#define RGMII_IO_MACRO_CONFIG 0x0 +#define SDCC_HC_REG_DLL_CONFIG 0x4 +#define SDCC_HC_REG_DDR_CONFIG 0xC +#define SDCC_HC_REG_DLL_CONFIG2 0x10 +#define SDC4_STATUS 0x14 +#define SDCC_USR_CTL 0x18 +#define RGMII_IO_MACRO_CONFIG2 0x1C +#define RGMII_IO_MACRO_DEBUG1 0x20 +#define EMAC_SYSTEM_LOW_POWER_DEBUG 0x28 + +/* RGMII_IO_MACRO_CONFIG fields */ +#define RGMII_CONFIG_FUNC_CLK_EN BIT(30) +#define RGMII_CONFIG_POS_NEG_DATA_SEL BIT(23) +#define RGMII_CONFIG_GPIO_CFG_RX_INT GENMASK(21, 20) +#define RGMII_CONFIG_GPIO_CFG_TX_INT GENMASK(19, 17) +#define RGMII_CONFIG_MAX_SPD_PRG_9 GENMASK(16, 8) +#define RGMII_CONFIG_MAX_SPD_PRG_2 GENMASK(7, 6) +#define RGMII_CONFIG_INTF_SEL GENMASK(5, 4) +#define RGMII_CONFIG_BYPASS_TX_ID_EN BIT(3) +#define RGMII_CONFIG_LOOPBACK_EN BIT(2) +#define RGMII_CONFIG_PROG_SWAP BIT(1) +#define RGMII_CONFIG_DDR_MODE BIT(0) + +/* SDCC_HC_REG_DLL_CONFIG fields */ +#define SDCC_DLL_CONFIG_DLL_RST BIT(30) +#define SDCC_DLL_CONFIG_PDN BIT(29) +#define SDCC_DLL_CONFIG_MCLK_FREQ GENMASK(26, 24) +#define SDCC_DLL_CONFIG_CDR_SELEXT GENMASK(23, 20) +#define SDCC_DLL_CONFIG_CDR_EXT_EN BIT(19) +#define SDCC_DLL_CONFIG_CK_OUT_EN BIT(18) +#define SDCC_DLL_CONFIG_CDR_EN BIT(17) +#define SDCC_DLL_CONFIG_DLL_EN BIT(16) +#define SDCC_DLL_MCLK_GATING_EN BIT(5) +#define SDCC_DLL_CDR_FINE_PHASE GENMASK(3, 2) + +/* SDCC_HC_REG_DDR_CONFIG fields */ +#define SDCC_DDR_CONFIG_PRG_DLY_EN BIT(31) +#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY GENMASK(26, 21) +#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE GENMASK(29, 27) +#define SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN BIT(30) +#define SDCC_DDR_CONFIG_PRG_RCLK_DLY GENMASK(8, 0) + +/* SDCC_HC_REG_DLL_CONFIG2 fields */ +#define SDCC_DLL_CONFIG2_DLL_CLOCK_DIS BIT(21) +#define SDCC_DLL_CONFIG2_MCLK_FREQ_CALC GENMASK(17, 10) +#define SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL GENMASK(3, 2) +#define SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW BIT(1) +#define SDCC_DLL_CONFIG2_DDR_CAL_EN BIT(0) + +/* SDC4_STATUS bits */ +#define SDC4_STATUS_DLL_LOCK BIT(7) + +/* RGMII_IO_MACRO_CONFIG2 fields */ +#define RGMII_CONFIG2_RSVD_CONFIG15 GENMASK(31, 17) +#define RGMII_CONFIG2_RGMII_CLK_SEL_CFG BIT(16) +#define RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN BIT(13) +#define RGMII_CONFIG2_CLK_DIVIDE_SEL BIT(12) +#define RGMII_CONFIG2_RX_PROG_SWAP BIT(7) +#define RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL BIT(6) +#define RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN BIT(5) + +struct ethqos_emac_por { + unsigned int offset; + unsigned int value; +}; + +struct qcom_ethqos { + struct platform_device *pdev; + void __iomem *rgmii_base; + + unsigned int rgmii_clk_rate; + struct clk *rgmii_clk; + unsigned int speed; + + const struct ethqos_emac_por *por; + unsigned int num_por; +}; + +static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset) +{ + return readl(ethqos->rgmii_base + offset); +} + +static void rgmii_writel(struct qcom_ethqos *ethqos, + int value, unsigned int offset) +{ + writel(value, ethqos->rgmii_base + offset); +} + +static void rgmii_updatel(struct qcom_ethqos *ethqos, + int mask, int val, unsigned int offset) +{ + unsigned int temp; + + temp = rgmii_readl(ethqos, offset); + temp = (temp & ~(mask)) | val; + rgmii_writel(ethqos, temp, offset); +} + +static void rgmii_dump(struct qcom_ethqos *ethqos) +{ + dev_dbg(ðqos->pdev->dev, "Rgmii register dump\n"); + dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n", + rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG)); + dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG: %x\n", + rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG)); + dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DDR_CONFIG: %x\n", + rgmii_readl(ethqos, SDCC_HC_REG_DDR_CONFIG)); + dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n", + rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG2)); + dev_dbg(ðqos->pdev->dev, "SDC4_STATUS: %x\n", + rgmii_readl(ethqos, SDC4_STATUS)); + dev_dbg(ðqos->pdev->dev, "SDCC_USR_CTL: %x\n", + rgmii_readl(ethqos, SDCC_USR_CTL)); + dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG2: %x\n", + rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG2)); + dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_DEBUG1: %x\n", + rgmii_readl(ethqos, RGMII_IO_MACRO_DEBUG1)); + dev_dbg(ðqos->pdev->dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n", + rgmii_readl(ethqos, EMAC_SYSTEM_LOW_POWER_DEBUG)); +} + +/* Clock rates */ +#define RGMII_1000_NOM_CLK_FREQ (250 * 1000 * 1000UL) +#define RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ (50 * 1000 * 1000UL) +#define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL) + +static void +ethqos_update_rgmii_clk(struct qcom_ethqos *ethqos, unsigned int speed) +{ + switch (speed) { + case SPEED_1000: + ethqos->rgmii_clk_rate = RGMII_1000_NOM_CLK_FREQ; + break; + + case SPEED_100: + ethqos->rgmii_clk_rate = RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ; + break; + + case SPEED_10: + ethqos->rgmii_clk_rate = RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ; + break; + } + + clk_set_rate(ethqos->rgmii_clk, ethqos->rgmii_clk_rate); +} + +static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos) +{ + rgmii_updatel(ethqos, RGMII_CONFIG_FUNC_CLK_EN, + RGMII_CONFIG_FUNC_CLK_EN, RGMII_IO_MACRO_CONFIG); +} + +static const struct ethqos_emac_por emac_v2_3_0_por[] = { + { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x00C01343 }, + { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642C }, + { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x00000000 }, + { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 }, + { .offset = SDCC_USR_CTL, .value = 0x00010800 }, + { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, +}; + +static int ethqos_dll_configure(struct qcom_ethqos *ethqos) +{ + unsigned int val; + int retry = 1000; + + /* Set CDR_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CDR_EN, + SDCC_DLL_CONFIG_CDR_EN, SDCC_HC_REG_DLL_CONFIG); + + /* Set CDR_EXT_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CDR_EXT_EN, + SDCC_DLL_CONFIG_CDR_EXT_EN, SDCC_HC_REG_DLL_CONFIG); + + /* Clear CK_OUT_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN, + 0, SDCC_HC_REG_DLL_CONFIG); + + /* Set DLL_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN, + SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG); + + rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN, + 0, SDCC_HC_REG_DLL_CONFIG); + + rgmii_updatel(ethqos, SDCC_DLL_CDR_FINE_PHASE, + 0, SDCC_HC_REG_DLL_CONFIG); + + /* Wait for CK_OUT_EN clear */ + do { + val = rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG); + val &= SDCC_DLL_CONFIG_CK_OUT_EN; + if (!val) + break; + mdelay(1); + retry--; + } while (retry > 0); + if (!retry) + dev_err(ðqos->pdev->dev, "Clear CK_OUT_EN timedout\n"); + + /* Set CK_OUT_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN, + SDCC_DLL_CONFIG_CK_OUT_EN, SDCC_HC_REG_DLL_CONFIG); + + /* Wait for CK_OUT_EN set */ + retry = 1000; + do { + val = rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG); + val &= SDCC_DLL_CONFIG_CK_OUT_EN; + if (val) + break; + mdelay(1); + retry--; + } while (retry > 0); + if (!retry) + dev_err(ðqos->pdev->dev, "Set CK_OUT_EN timedout\n"); + + /* Set DDR_CAL_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN, + SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_HC_REG_DLL_CONFIG2); + + rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS, + 0, SDCC_HC_REG_DLL_CONFIG2); + + rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_MCLK_FREQ_CALC, + 0x1A << 10, SDCC_HC_REG_DLL_CONFIG2); + + rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SEL, + BIT(2), SDCC_HC_REG_DLL_CONFIG2); + + rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW, + SDCC_DLL_CONFIG2_DDR_TRAFFIC_INIT_SW, + SDCC_HC_REG_DLL_CONFIG2); + + return 0; +} + +static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) +{ + /* Disable loopback mode */ + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_TO_RX_LOOPBACK_EN, + 0, RGMII_IO_MACRO_CONFIG2); + + /* Select RGMII, write 0 to interface select */ + rgmii_updatel(ethqos, RGMII_CONFIG_INTF_SEL, + 0, RGMII_IO_MACRO_CONFIG); + + switch (ethqos->speed) { + case SPEED_1000: + rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE, + RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, + 0, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, + RGMII_CONFIG_POS_NEG_DATA_SEL, + RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP, + RGMII_CONFIG_PROG_SWAP, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, + RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, + RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, + RGMII_CONFIG2_RX_PROG_SWAP, + RGMII_IO_MACRO_CONFIG2); + + /* Set PRG_RCLK_DLY to 57 for 1.8 ns delay */ + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, + 57, SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_DLY_EN, + SDCC_DDR_CONFIG_PRG_DLY_EN, + SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + break; + + case SPEED_100: + rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE, + RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, + RGMII_CONFIG_BYPASS_TX_ID_EN, + RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, + 0, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP, + 0, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, + RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, + RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_2, + BIT(6), RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, + 0, RGMII_IO_MACRO_CONFIG2); + /* Write 0x5 to PRG_RCLK_DLY_CODE */ + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, + SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, + SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, + SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, + SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + break; + + case SPEED_10: + rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE, + RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_BYPASS_TX_ID_EN, + RGMII_CONFIG_BYPASS_TX_ID_EN, + RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_POS_NEG_DATA_SEL, + 0, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_PROG_SWAP, + 0, RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG_MAX_SPD_PRG_9, + BIT(12) | GENMASK(9, 8), + RGMII_IO_MACRO_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, + 0, RGMII_IO_MACRO_CONFIG2); + rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, + 0, RGMII_IO_MACRO_CONFIG2); + /* Write 0x5 to PRG_RCLK_DLY_CODE */ + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_CODE, + (BIT(29) | BIT(27)), SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, + SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY, + SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, + SDCC_DDR_CONFIG_EXT_PRG_RCLK_DLY_EN, + SDCC_HC_REG_DDR_CONFIG); + rgmii_updatel(ethqos, RGMII_CONFIG_LOOPBACK_EN, + RGMII_CONFIG_LOOPBACK_EN, RGMII_IO_MACRO_CONFIG); + break; + default: + dev_err(ðqos->pdev->dev, + "Invalid speed %d\n", ethqos->speed); + return -EINVAL; + } + + return 0; +} + +static int ethqos_configure(struct qcom_ethqos *ethqos) +{ + volatile unsigned int dll_lock; + unsigned int i, retry = 1000; + + /* Reset to POR values and enable clk */ + for (i = 0; i < ethqos->num_por; i++) + rgmii_writel(ethqos, ethqos->por[i].value, + ethqos->por[i].offset); + ethqos_set_func_clk_en(ethqos); + + /* Initialize the DLL first */ + + /* Set DLL_RST */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_RST, + SDCC_DLL_CONFIG_DLL_RST, SDCC_HC_REG_DLL_CONFIG); + + /* Set PDN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, + SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG); + + /* Clear DLL_RST */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_RST, 0, + SDCC_HC_REG_DLL_CONFIG); + + /* Clear PDN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, 0, + SDCC_HC_REG_DLL_CONFIG); + + if (ethqos->speed != SPEED_100 && ethqos->speed != SPEED_10) { + /* Set DLL_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN, + SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG); + + /* Set CK_OUT_EN */ + rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN, + SDCC_DLL_CONFIG_CK_OUT_EN, + SDCC_HC_REG_DLL_CONFIG); + + /* Set USR_CTL bit 26 with mask of 3 bits */ + rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26), SDCC_USR_CTL); + + /* wait for DLL LOCK */ + do { + mdelay(1); + dll_lock = rgmii_readl(ethqos, SDC4_STATUS); + if (dll_lock & SDC4_STATUS_DLL_LOCK) + break; + } while (retry > 0); + if (!retry) + dev_err(ðqos->pdev->dev, + "Timeout while waiting for DLL lock\n"); + } + + if (ethqos->speed == SPEED_1000) + ethqos_dll_configure(ethqos); + + ethqos_rgmii_macro_init(ethqos); + + return 0; +} + +static void ethqos_fix_mac_speed(void *priv, unsigned int speed) +{ + struct qcom_ethqos *ethqos = priv; + + ethqos->speed = speed; + ethqos_update_rgmii_clk(ethqos, speed); + ethqos_configure(ethqos); +} + +static int qcom_ethqos_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct qcom_ethqos *ethqos; + struct resource *res; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) { + dev_err(&pdev->dev, "dt configuration failed\n"); + return PTR_ERR(plat_dat); + } + + ethqos = devm_kzalloc(&pdev->dev, sizeof(*ethqos), GFP_KERNEL); + if (!ethqos) { + ret = -ENOMEM; + goto err_mem; + } + + ethqos->pdev = pdev; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii"); + ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ethqos->rgmii_base)) { + dev_err(&pdev->dev, "Can't get rgmii base\n"); + ret = PTR_ERR(ethqos->rgmii_base); + goto err_mem; + } + + ethqos->por = of_device_get_match_data(&pdev->dev); + + ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); + if (IS_ERR(ethqos->rgmii_clk)) { + ret = PTR_ERR(ethqos->rgmii_clk); + goto err_mem; + } + + ret = clk_prepare_enable(ethqos->rgmii_clk); + if (ret) + goto err_mem; + + ethqos->speed = SPEED_1000; + ethqos_update_rgmii_clk(ethqos, SPEED_1000); + ethqos_set_func_clk_en(ethqos); + + plat_dat->bsp_priv = ethqos; + plat_dat->fix_mac_speed = ethqos_fix_mac_speed; + plat_dat->has_gmac4 = 1; + plat_dat->pmt = 1; + plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + goto err_clk; + + rgmii_dump(ethqos); + + return ret; + +err_clk: + clk_disable_unprepare(ethqos->rgmii_clk); + +err_mem: + stmmac_remove_config_dt(pdev, plat_dat); + + return ret; +} + +static int qcom_ethqos_remove(struct platform_device *pdev) +{ + struct qcom_ethqos *ethqos; + int ret; + + ethqos = get_stmmac_bsp_priv(&pdev->dev); + if (!ethqos) + return -ENODEV; + + ret = stmmac_pltfr_remove(pdev); + clk_disable_unprepare(ethqos->rgmii_clk); + + return ret; +} + +static const struct of_device_id qcom_ethqos_match[] = { + { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_por}, + { } +}; +MODULE_DEVICE_TABLE(of, qcom_ethqos_match); + +static struct platform_driver qcom_ethqos_driver = { + .probe = qcom_ethqos_probe, + .remove = qcom_ethqos_remove, + .driver = { + .name = "qcom-ethqos", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = of_match_ptr(qcom_ethqos_match), + }, +}; +module_platform_driver(qcom_ethqos_driver); + +MODULE_DESCRIPTION("Qualcomm ETHQOS driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 7b923362ee55..3b174eae77c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1342,8 +1342,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) } ret = phy_power_on(bsp_priv, true); - if (ret) + if (ret) { + gmac_clk_enable(bsp_priv, false); return ret; + } pm_runtime_enable(dev); pm_runtime_get_sync(dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 63e1064b27a2..f9b42b0c20f4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -28,6 +28,7 @@ #include #include "common.h" #include +#include #include struct stmmac_resources { @@ -174,6 +175,7 @@ struct stmmac_priv { unsigned int mode; unsigned int chain_mode; int extend_desc; + struct hwtstamp_config tstamp_config; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_ops; unsigned int default_addend; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5afba69981cf..517555bcfa44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -534,7 +534,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, } /** - * stmmac_hwtstamp_ioctl - control hardware timestamping. + * stmmac_hwtstamp_set - control hardware timestamping. * @dev: device pointer. * @ifr: An IOCTL specific structure, that can contain a pointer to * a proprietary structure used to pass information to the driver. @@ -544,7 +544,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, * Return Value: * 0 on success and an appropriate -ve integer on failure. */ -static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) +static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct stmmac_priv *priv = netdev_priv(dev); struct hwtstamp_config config; @@ -573,7 +573,7 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) } if (copy_from_user(&config, ifr->ifr_data, - sizeof(struct hwtstamp_config))) + sizeof(config))) return -EFAULT; netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n", @@ -765,8 +765,31 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) (u32)now.tv_sec, now.tv_nsec); } + memcpy(&priv->tstamp_config, &config, sizeof(config)); + return copy_to_user(ifr->ifr_data, &config, - sizeof(struct hwtstamp_config)) ? -EFAULT : 0; + sizeof(config)) ? -EFAULT : 0; +} + +/** + * stmmac_hwtstamp_get - read hardware timestamping. + * @dev: device pointer. + * @ifr: An IOCTL specific structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * Description: + * This function obtain the current hardware timestamping settings + as requested. + */ +static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct hwtstamp_config *config = &priv->tstamp_config; + + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, config, + sizeof(*config)) ? -EFAULT : 0; } /** @@ -3767,7 +3790,10 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) ret = phy_mii_ioctl(dev->phydev, rq, cmd); break; case SIOCSHWTSTAMP: - ret = stmmac_hwtstamp_ioctl(dev, rq); + ret = stmmac_hwtstamp_set(dev, rq); + break; + case SIOCGHWTSTAMP: + ret = stmmac_hwtstamp_get(dev, rq); break; default: break; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 9020b084b953..7ec4eb74fe21 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1,22 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ /* cassini.c: Sun Microsystems Cassini(+) ethernet driver. * * Copyright (C) 2004 Sun Microsystems Inc. * Copyright (C) 2003 Adrian Sun (asun@darksunrising.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * * This driver uses the sungem driver (c) David Miller * (davem@redhat.com) as its basis. * diff --git a/drivers/net/ethernet/sun/cassini.h b/drivers/net/ethernet/sun/cassini.h index 13f3860496a8..ae5f05f03f88 100644 --- a/drivers/net/ethernet/sun/cassini.h +++ b/drivers/net/ethernet/sun/cassini.h @@ -1,23 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* $Id: cassini.h,v 1.16 2004/08/17 21:15:16 zaumen Exp $ * cassini.h: Definitions for Sun Microsystems Cassini(+) ethernet driver. * * Copyright (C) 2004 Sun Microsystems Inc. * Copyright (c) 2003 Adrian Sun (asun@darksunrising.com) * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - * * vendor id: 0x108E (Sun Microsystems, Inc.) * device id: 0xabba (Cassini) * revision ids: 0x01 = Cassini diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 810dfc7de1f9..e2d47b24a869 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -608,7 +608,7 @@ static void cpmac_end_xmit(struct net_device *dev, int queue) netdev_dbg(dev, "sent 0x%p, len=%d\n", desc->skb, desc->skb->len); - dev_kfree_skb_irq(desc->skb); + dev_consume_skb_irq(desc->skb); desc->skb = NULL; if (__netif_subqueue_stopped(dev, queue)) netif_wake_subqueue(dev, queue); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ef6f766f6389..e859ae2e42d5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -144,6 +144,8 @@ struct hv_netvsc_packet { u32 total_data_buflen; }; +#define NETVSC_HASH_KEYLEN 40 + struct netvsc_device_info { unsigned char mac_adr[ETH_ALEN]; u32 num_chn; @@ -151,6 +153,8 @@ struct netvsc_device_info { u32 recv_sections; u32 send_section_size; u32 recv_section_size; + + u8 rss_key[NETVSC_HASH_KEYLEN]; }; enum rndis_device_state { @@ -160,8 +164,6 @@ enum rndis_device_state { RNDIS_DEV_DATAINITIALIZED, }; -#define NETVSC_HASH_KEYLEN 40 - struct rndis_device { struct net_device *ndev; @@ -209,7 +211,9 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); -int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); +int rndis_set_subchannel(struct net_device *ndev, + struct netvsc_device *nvdev, + struct netvsc_device_info *dev_info); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, @@ -1177,7 +1181,7 @@ enum ndis_per_pkt_info_type { enum rndis_per_pkt_info_interal_type { RNDIS_PKTINFO_ID = 1, - /* Add more memebers here */ + /* Add more members here */ RNDIS_PKTINFO_MAX }; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 922054c1d544..813d195bbd57 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -84,7 +84,7 @@ static void netvsc_subchan_work(struct work_struct *w) rdev = nvdev->extension; if (rdev) { - ret = rndis_set_subchannel(rdev->ndev, nvdev); + ret = rndis_set_subchannel(rdev->ndev, nvdev, NULL); if (ret == 0) { netif_device_attach(rdev->ndev); } else { @@ -1331,7 +1331,7 @@ void netvsc_channel_cb(void *context) prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index); if (napi_schedule_prep(&nvchan->napi)) { - /* disable interupts from host */ + /* disable interrupts from host */ hv_begin_read(rbi); __napi_schedule_irqoff(&nvchan->napi); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 91ed15ea5883..256adbd044f5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -370,7 +370,7 @@ static u32 fill_pg_buf(struct page *page, u32 offset, u32 len, { int j = 0; - /* Deal with compund pages by ignoring unused part + /* Deal with compound pages by ignoring unused part * of the page. */ page += (offset >> PAGE_SHIFT); @@ -858,6 +858,39 @@ static void netvsc_get_channels(struct net_device *net, } } +/* Alloc struct netvsc_device_info, and initialize it from either existing + * struct netvsc_device, or from default values. + */ +static struct netvsc_device_info *netvsc_devinfo_get + (struct netvsc_device *nvdev) +{ + struct netvsc_device_info *dev_info; + + dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC); + + if (!dev_info) + return NULL; + + if (nvdev) { + dev_info->num_chn = nvdev->num_chn; + dev_info->send_sections = nvdev->send_section_cnt; + dev_info->send_section_size = nvdev->send_section_size; + dev_info->recv_sections = nvdev->recv_section_cnt; + dev_info->recv_section_size = nvdev->recv_section_size; + + memcpy(dev_info->rss_key, nvdev->extension->rss_key, + NETVSC_HASH_KEYLEN); + } else { + dev_info->num_chn = VRSS_CHANNEL_DEFAULT; + dev_info->send_sections = NETVSC_DEFAULT_TX; + dev_info->send_section_size = NETVSC_SEND_SECTION_SIZE; + dev_info->recv_sections = NETVSC_DEFAULT_RX; + dev_info->recv_section_size = NETVSC_RECV_SECTION_SIZE; + } + + return dev_info; +} + static int netvsc_detach(struct net_device *ndev, struct netvsc_device *nvdev) { @@ -909,7 +942,7 @@ static int netvsc_attach(struct net_device *ndev, return PTR_ERR(nvdev); if (nvdev->num_chn > 1) { - ret = rndis_set_subchannel(ndev, nvdev); + ret = rndis_set_subchannel(ndev, nvdev, dev_info); /* if unavailable, just proceed with one queue */ if (ret) { @@ -943,7 +976,7 @@ static int netvsc_set_channels(struct net_device *net, struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); unsigned int orig, count = channels->combined_count; - struct netvsc_device_info device_info; + struct netvsc_device_info *device_info; int ret; /* We do not support separate count for rx, tx, or other */ @@ -962,24 +995,26 @@ static int netvsc_set_channels(struct net_device *net, orig = nvdev->num_chn; - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = count; - device_info.send_sections = nvdev->send_section_cnt; - device_info.send_section_size = nvdev->send_section_size; - device_info.recv_sections = nvdev->recv_section_cnt; - device_info.recv_section_size = nvdev->recv_section_size; + device_info = netvsc_devinfo_get(nvdev); + + if (!device_info) + return -ENOMEM; + + device_info->num_chn = count; ret = netvsc_detach(net, nvdev); if (ret) - return ret; + goto out; - ret = netvsc_attach(net, &device_info); + ret = netvsc_attach(net, device_info); if (ret) { - device_info.num_chn = orig; - if (netvsc_attach(net, &device_info)) + device_info->num_chn = orig; + if (netvsc_attach(net, device_info)) netdev_err(net, "restoring channel setting failed\n"); } +out: + kfree(device_info); return ret; } @@ -1048,48 +1083,45 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); int orig_mtu = ndev->mtu; - struct netvsc_device_info device_info; + struct netvsc_device_info *device_info; int ret = 0; if (!nvdev || nvdev->destroy) return -ENODEV; + device_info = netvsc_devinfo_get(nvdev); + + if (!device_info) + return -ENOMEM; + /* Change MTU of underlying VF netdev first. */ if (vf_netdev) { ret = dev_set_mtu(vf_netdev, mtu); if (ret) - return ret; + goto out; } - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = nvdev->num_chn; - device_info.send_sections = nvdev->send_section_cnt; - device_info.send_section_size = nvdev->send_section_size; - device_info.recv_sections = nvdev->recv_section_cnt; - device_info.recv_section_size = nvdev->recv_section_size; - ret = netvsc_detach(ndev, nvdev); if (ret) goto rollback_vf; ndev->mtu = mtu; - ret = netvsc_attach(ndev, &device_info); - if (ret) - goto rollback; + ret = netvsc_attach(ndev, device_info); + if (!ret) + goto out; - return 0; - -rollback: /* Attempt rollback to original MTU */ ndev->mtu = orig_mtu; - if (netvsc_attach(ndev, &device_info)) + if (netvsc_attach(ndev, device_info)) netdev_err(ndev, "restoring mtu failed\n"); rollback_vf: if (vf_netdev) dev_set_mtu(vf_netdev, orig_mtu); +out: + kfree(device_info); return ret; } @@ -1674,7 +1706,7 @@ static int netvsc_set_ringparam(struct net_device *ndev, { struct net_device_context *ndevctx = netdev_priv(ndev); struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); - struct netvsc_device_info device_info; + struct netvsc_device_info *device_info; struct ethtool_ringparam orig; u32 new_tx, new_rx; int ret = 0; @@ -1694,26 +1726,29 @@ static int netvsc_set_ringparam(struct net_device *ndev, new_rx == orig.rx_pending) return 0; /* no change */ - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = nvdev->num_chn; - device_info.send_sections = new_tx; - device_info.send_section_size = nvdev->send_section_size; - device_info.recv_sections = new_rx; - device_info.recv_section_size = nvdev->recv_section_size; + device_info = netvsc_devinfo_get(nvdev); + + if (!device_info) + return -ENOMEM; + + device_info->send_sections = new_tx; + device_info->recv_sections = new_rx; ret = netvsc_detach(ndev, nvdev); if (ret) - return ret; + goto out; - ret = netvsc_attach(ndev, &device_info); + ret = netvsc_attach(ndev, device_info); if (ret) { - device_info.send_sections = orig.tx_pending; - device_info.recv_sections = orig.rx_pending; + device_info->send_sections = orig.tx_pending; + device_info->recv_sections = orig.rx_pending; - if (netvsc_attach(ndev, &device_info)) + if (netvsc_attach(ndev, device_info)) netdev_err(ndev, "restoring ringparam failed"); } +out: + kfree(device_info); return ret; } @@ -2088,7 +2123,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; - /* if syntihetic interface is a different namespace, + /* if synthetic interface is a different namespace, * then move the VF to that namespace; join will be * done again in that context. */ @@ -2167,7 +2202,7 @@ static int netvsc_probe(struct hv_device *dev, { struct net_device *net = NULL; struct net_device_context *net_device_ctx; - struct netvsc_device_info device_info; + struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; int ret = -ENOMEM; @@ -2214,21 +2249,21 @@ static int netvsc_probe(struct hv_device *dev, netif_set_real_num_rx_queues(net, 1); /* Notify the netvsc driver of the new device */ - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = VRSS_CHANNEL_DEFAULT; - device_info.send_sections = NETVSC_DEFAULT_TX; - device_info.send_section_size = NETVSC_SEND_SECTION_SIZE; - device_info.recv_sections = NETVSC_DEFAULT_RX; - device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE; + device_info = netvsc_devinfo_get(NULL); - nvdev = rndis_filter_device_add(dev, &device_info); + if (!device_info) { + ret = -ENOMEM; + goto devinfo_failed; + } + + nvdev = rndis_filter_device_add(dev, device_info); if (IS_ERR(nvdev)) { ret = PTR_ERR(nvdev); netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); goto rndis_failed; } - memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + memcpy(net->dev_addr, device_info->mac_adr, ETH_ALEN); /* We must get rtnl lock before scheduling nvdev->subchan_work, * otherwise netvsc_subchan_work() can get rtnl lock first and wait @@ -2236,7 +2271,7 @@ static int netvsc_probe(struct hv_device *dev, * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer() * -> ... -> device_add() -> ... -> __device_attach() can't get * the device lock, so all the subchannels can't be processed -- - * finally netvsc_subchan_work() hangs for ever. + * finally netvsc_subchan_work() hangs forever. */ rtnl_lock(); @@ -2266,12 +2301,16 @@ static int netvsc_probe(struct hv_device *dev, list_add(&net_device_ctx->list, &netvsc_dev_list); rtnl_unlock(); + + kfree(device_info); return 0; register_failed: rtnl_unlock(); rndis_filter_device_remove(dev, nvdev); rndis_failed: + kfree(device_info); +devinfo_failed: free_percpu(net_device_ctx->vf_stats); no_stats: hv_set_drvdata(dev, NULL); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8b537a049c1e..73b60592de06 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -774,8 +774,8 @@ cleanup: return ret; } -int rndis_filter_set_rss_param(struct rndis_device *rdev, - const u8 *rss_key) +static int rndis_set_rss_param_msg(struct rndis_device *rdev, + const u8 *rss_key, u16 flag) { struct net_device *ndev = rdev->ndev; struct rndis_request *request; @@ -804,7 +804,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, rssp->hdr.type = NDIS_OBJECT_TYPE_RSS_PARAMETERS; rssp->hdr.rev = NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2; rssp->hdr.size = sizeof(struct ndis_recv_scale_param); - rssp->flag = 0; + rssp->flag = flag; rssp->hashinfo = NDIS_HASH_FUNC_TOEPLITZ | NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; @@ -829,9 +829,12 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, wait_for_completion(&request->wait_event); set_complete = &request->response_msg.msg.set_complete; - if (set_complete->status == RNDIS_STATUS_SUCCESS) - memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); - else { + if (set_complete->status == RNDIS_STATUS_SUCCESS) { + if (!(flag & NDIS_RSS_PARAM_FLAG_DISABLE_RSS) && + !(flag & NDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED)) + memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); + + } else { netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", set_complete->status); ret = -EINVAL; @@ -842,6 +845,16 @@ cleanup: return ret; } +int rndis_filter_set_rss_param(struct rndis_device *rdev, + const u8 *rss_key) +{ + /* Disable RSS before change */ + rndis_set_rss_param_msg(rdev, rss_key, + NDIS_RSS_PARAM_FLAG_DISABLE_RSS); + + return rndis_set_rss_param_msg(rdev, rss_key, 0); +} + static int rndis_filter_query_device_link_status(struct rndis_device *dev, struct netvsc_device *net_device) { @@ -1121,7 +1134,9 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) * This breaks overlap of processing the host message for the * new primary channel with the initialization of sub-channels. */ -int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) +int rndis_set_subchannel(struct net_device *ndev, + struct netvsc_device *nvdev, + struct netvsc_device_info *dev_info) { struct nvsp_message *init_packet = &nvdev->channel_init_pkt; struct net_device_context *ndev_ctx = netdev_priv(ndev); @@ -1161,8 +1176,11 @@ int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) wait_event(nvdev->subchan_open, atomic_read(&nvdev->open_chn) == nvdev->num_chn); - /* ignore failues from setting rss parameters, still have channels */ - rndis_filter_set_rss_param(rdev, netvsc_hash_key); + /* ignore failures from setting rss parameters, still have channels */ + if (dev_info) + rndis_filter_set_rss_param(rdev, dev_info->rss_key); + else + rndis_filter_set_rss_param(rdev, netvsc_hash_key); netif_set_real_num_tx_queues(ndev, nvdev->num_chn); netif_set_real_num_rx_queues(ndev, nvdev->num_chn); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 084a1b3fbc80..26e53832b095 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -119,8 +119,6 @@ static struct macvlan_port *macvlan_port_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->rx_handler_data); } -#define macvlan_port_exists(dev) (dev->priv_flags & IFF_MACVLAN_PORT) - static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, const unsigned char *addr) { @@ -337,7 +335,7 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); - kfree_skb(skb); + consume_skb(skb); } } @@ -1378,7 +1376,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, if (!tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); - if (!macvlan_port_exists(lowerdev)) { + if (!netif_is_macvlan_port(lowerdev)) { err = macvlan_port_create(lowerdev); if (err < 0) return err; @@ -1638,7 +1636,7 @@ static int macvlan_device_event(struct notifier_block *unused, struct macvlan_port *port; LIST_HEAD(list_kill); - if (!macvlan_port_exists(dev)) + if (!netif_is_macvlan_port(dev)) return NOTIFY_DONE; port = macvlan_port_get_rtnl(dev); diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c index 9d0504f3e3b2..65b4b0960b1e 100644 --- a/drivers/net/phy/amd.c +++ b/drivers/net/phy/amd.c @@ -1,15 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for AMD am79c PHYs * * Author: Heiko Schocher * * Copyright (c) 2011 DENX Software Engineering GmbH - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index beb3309bb0f0..0578fe75d3b6 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for Aquantia PHY * * Author: Shaohui Xie * * Copyright 2015 Freescale Semiconductor, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include diff --git a/drivers/net/phy/asix.c b/drivers/net/phy/asix.c index 8ebe7f5484ae..f14ba5366b91 100644 --- a/drivers/net/phy/asix.c +++ b/drivers/net/phy/asix.c @@ -1,13 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: GPL-2.0+ /* Driver for Asix PHYs * * Author: Michael Schmitz - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index f9432d053a22..90dc62c15fc5 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/at803x.c * * Driver for Atheros 803x PHY * * Author: Matus Ujhelyi - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include @@ -39,9 +35,6 @@ #define AT803X_LOC_MAC_ADDR_0_15_OFFSET 0x804C #define AT803X_LOC_MAC_ADDR_16_31_OFFSET 0x804B #define AT803X_LOC_MAC_ADDR_32_47_OFFSET 0x804A -#define AT803X_MMD_ACCESS_CONTROL 0x0D -#define AT803X_MMD_ACCESS_CONTROL_DATA 0x0E -#define AT803X_FUNC_DATA 0x4003 #define AT803X_REG_CHIP_CONFIG 0x1f #define AT803X_BT_BX_REG_SEL 0x8000 @@ -110,16 +103,16 @@ static int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg, return phy_write(phydev, AT803X_DEBUG_DATA, val); } -static inline int at803x_enable_rx_delay(struct phy_device *phydev) +static inline int at803x_disable_rx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0, - AT803X_DEBUG_RX_CLK_DLY_EN); + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, + AT803X_DEBUG_RX_CLK_DLY_EN, 0); } -static inline int at803x_enable_tx_delay(struct phy_device *phydev) +static inline int at803x_disable_tx_delay(struct phy_device *phydev) { - return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5, 0, - AT803X_DEBUG_TX_CLK_DLY_EN); + return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_5, + AT803X_DEBUG_TX_CLK_DLY_EN, 0); } /* save relevant PHY registers to private copy */ @@ -168,16 +161,9 @@ static int at803x_set_wol(struct phy_device *phydev, if (!is_valid_ether_addr(mac)) return -EINVAL; - for (i = 0; i < 3; i++) { - phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, - AT803X_DEVICE_ADDR); - phy_write(phydev, AT803X_MMD_ACCESS_CONTROL_DATA, - offsets[i]); - phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, - AT803X_FUNC_DATA); - phy_write(phydev, AT803X_MMD_ACCESS_CONTROL_DATA, - mac[(i * 2) + 1] | (mac[(i * 2)] << 8)); - } + for (i = 0; i < 3; i++) + phy_write_mmd(phydev, AT803X_DEVICE_ADDR, offsets[i], + mac[(i * 2) + 1] | (mac[(i * 2)] << 8)); value = phy_read(phydev, AT803X_INTR_ENABLE); value |= AT803X_INTR_ENABLE_WOL; @@ -256,15 +242,17 @@ static int at803x_config_init(struct phy_device *phydev) return ret; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) { - ret = at803x_enable_rx_delay(phydev); + phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII) { + ret = at803x_disable_rx_delay(phydev); if (ret < 0) return ret; } if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) { - ret = at803x_enable_tx_delay(phydev); + phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII) { + ret = at803x_disable_tx_delay(phydev); if (ret < 0) return ret; } diff --git a/drivers/net/phy/bcm-cygnus.c b/drivers/net/phy/bcm-cygnus.c index e757b09f1889..ab8e12922bf9 100644 --- a/drivers/net/phy/bcm-cygnus.c +++ b/drivers/net/phy/bcm-cygnus.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ /* Broadcom Cygnus SoC internal transceivers support. */ diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index e10e7b54ec4b..a75642051b8b 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2017 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include "bcm-phy-lib.h" diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h index 81cceaa412fe..17faaefcfd60 100644 --- a/drivers/net/phy/bcm-phy-lib.h +++ b/drivers/net/phy/bcm-phy-lib.h @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #ifndef _LINUX_BCM_PHY_LIB_H diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index a88dd14a25c0..44e6cff419a0 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for Broadcom 63xx SOCs integrated PHYs - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include "bcm-phy-lib.h" #include diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 712224cc442d..b8415f8fae14 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Broadcom BCM7xxx internal transceivers support. * * Copyright (C) 2014-2017 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c index 1b350183bffb..f0c0eefe2202 100644 --- a/drivers/net/phy/bcm87xx.c +++ b/drivers/net/phy/bcm87xx.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2011 - 2012 Cavium, Inc. */ @@ -197,6 +194,7 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8706, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8706", + .features = PHY_10GBIT_FEC_FEATURES, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, @@ -208,6 +206,7 @@ static struct phy_driver bcm87xx_driver[] = { .phy_id = PHY_ID_BCM8727, .phy_id_mask = 0xffffffff, .name = "Broadcom BCM8727", + .features = PHY_10GBIT_FEC_FEATURES, .config_init = bcm87xx_config_init, .config_aneg = bcm87xx_config_aneg, .read_status = bcm87xx_read_status, @@ -219,4 +218,4 @@ static struct phy_driver bcm87xx_driver[] = { module_phy_driver(bcm87xx_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index aa73c5cc5f86..9605d4fe540b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/broadcom.c * @@ -7,11 +8,6 @@ * Copyright (c) 2006 Maciej W. Rozycki * * Inspired by code written by Amy Fong. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include "bcm-phy-lib.h" diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c index fea61c81bda9..108ed24f8489 100644 --- a/drivers/net/phy/cicada.c +++ b/drivers/net/phy/cicada.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/cicada.c * @@ -6,12 +7,6 @@ * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c index 8022cd317f62..c291dc014769 100644 --- a/drivers/net/phy/cortina.c +++ b/drivers/net/phy/cortina.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright 2017 NXP * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * * CORTINA is a registered trademark of Cortina Systems, Inc. * */ @@ -88,6 +79,7 @@ static struct phy_driver cortina_driver[] = { .phy_id = PHY_ID_CS4340, .phy_id_mask = 0xffffffff, .name = "Cortina CS4340", + .features = PHY_10GBIT_FEATURES, .config_init = gen10g_config_init, .config_aneg = gen10g_config_aneg, .read_status = cortina_read_status, diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c index 97162008f42b..bf39baa7f2c8 100644 --- a/drivers/net/phy/davicom.c +++ b/drivers/net/phy/davicom.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/davicom.c * @@ -6,12 +7,6 @@ * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 18b41bc345ab..25ef483bcc24 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1,21 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for the National Semiconductor DP83640 PHYTER * * Copyright (C) 2010 OMICRON electronics GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 24c7f149f3e6..bbd8c22067f3 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for the Texas Instruments DP83822 PHY * * Copyright (C) 2017 Texas Instruments Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include @@ -338,4 +330,4 @@ MODULE_DEVICE_TABLE(mdio, dp83822_tbl); MODULE_DESCRIPTION("Texas Instruments DP83822 PHY driver"); MODULE_AUTHOR("Dan Murphy @@ -133,4 +125,4 @@ module_phy_driver(dp83848_driver); MODULE_DESCRIPTION("Texas Instruments DP83848 PHY driver"); MODULE_AUTHOR("Andrew F. Davis "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index da6a67d47ce9..8a8d9f606b3e 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -1,16 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for the Texas Instruments DP83867 PHY * * Copyright (C) 2015 Texas Instruments Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include @@ -357,4 +349,4 @@ MODULE_DEVICE_TABLE(mdio, dp83867_tbl); MODULE_DESCRIPTION("Texas Instruments DP83867 PHY driver"); MODULE_AUTHOR("Dan Murphy #include diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 72d43c88e6ff..47a8cb574c45 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Fixed MDIO bus (MDIO bus emulation with fixed PHYs) * @@ -5,11 +6,6 @@ * Anton Vorontsov * * Copyright (c) 2006-2007 MontaVista Software, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index 7d5938b87660..ebef8354bc81 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for ICPlus PHYs * * Copyright (c) 2007 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index fc0f5024a29e..02d9713318b6 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -1,16 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Daniel Schwierzeck * Copyright (C) 2016 Hauke Mehrtens - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index c8bb29ae1a2a..a93d673baf35 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/lxt.c * @@ -6,12 +7,6 @@ * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index a9c7c7f41b0c..90f44ba8aca7 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/marvell.c * @@ -8,12 +9,6 @@ * Copyright (c) 2004 Freescale Semiconductor, Inc. * * Copyright (c) 2013 Michael Stapelberg - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include @@ -1046,6 +1041,39 @@ static int m88e1145_config_init(struct phy_device *phydev) return 0; } +/* The VOD can be out of specification on link up. Poke an + * undocumented register, in an undocumented page, with a magic value + * to fix this. + */ +static int m88e6390_errata(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_BMCR, + BMCR_ANENABLE | BMCR_SPEED1000 | BMCR_FULLDPLX); + if (err) + return err; + + usleep_range(300, 400); + + err = phy_write_paged(phydev, 0xf8, 0x08, 0x36); + if (err) + return err; + + return genphy_soft_reset(phydev); +} + +static int m88e6390_config_aneg(struct phy_device *phydev) +{ + int err; + + err = m88e6390_errata(phydev); + if (err) + return err; + + return m88e1510_config_aneg(phydev); +} + /** * fiber_lpa_mod_linkmode_lpa_t * @advertising: the linkmode advertisement settings @@ -1402,7 +1430,7 @@ static int m88e1318_set_wol(struct phy_device *phydev, * before enabling it if !phy_interrupt_is_valid() */ if (!phy_interrupt_is_valid(phydev)) - phy_read(phydev, MII_M1011_IEVENT); + __phy_read(phydev, MII_M1011_IEVENT); /* Enable the WOL interrupt */ err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, @@ -2283,7 +2311,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .probe = m88e6390_probe, .config_init = &marvell_config_init, - .config_aneg = &m88e1510_config_aneg, + .config_aneg = &m88e6390_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 82ab6ed3b74e..38cfc923a88a 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Marvell 10G 88x3310 PHY driver * diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 46fe1ae919a3..7d0f388d8db8 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -1,14 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015 Broadcom Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index df75efa96a7d..8295bc7c8c20 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Broadcom UniMAC MDIO bus controller driver * * Copyright (C) 2014-2017 Broadcom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. */ #include diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c index 15352f987bdf..5136275c8e73 100644 --- a/drivers/net/phy/mdio-bitbang.c +++ b/drivers/net/phy/mdio-bitbang.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Bitbanged MDIO support. * @@ -11,10 +12,6 @@ * * 2005 (c) MontaVista Software, Inc. * Vitaly Bordug - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include @@ -232,4 +229,4 @@ void free_mdio_bitbang(struct mii_bus *bus) } EXPORT_SYMBOL(free_mdio_bitbang); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 863496fa5d13..d9b54c67ef9f 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * mdio-boardinfo - Collect pre-declarations for MDIO devices - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include diff --git a/drivers/net/phy/mdio-cavium.c b/drivers/net/phy/mdio-cavium.c index 6df2fa755bb4..1afd6fc1a351 100644 --- a/drivers/net/phy/mdio-cavium.c +++ b/drivers/net/phy/mdio-cavium.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2009-2016 Cavium, Inc. */ @@ -150,4 +147,4 @@ EXPORT_SYMBOL(cavium_mdiobus_write); MODULE_DESCRIPTION("Common code for OCTEON and Thunder MDIO bus drivers"); MODULE_AUTHOR("David Daney"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-cavium.h b/drivers/net/phy/mdio-cavium.h index 4bccd45d24e2..ed5f9bb5448d 100644 --- a/drivers/net/phy/mdio-cavium.h +++ b/drivers/net/phy/mdio-cavium.h @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2009-2016 Cavium, Inc. */ diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index ea9a0e339778..1b00235d7dc5 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * GPIO based MDIO bitbang driver. * Supports OpenFirmware. @@ -14,10 +15,6 @@ * * 2005 (c) MontaVista Software, Inc. * Vitaly Bordug - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include @@ -216,5 +213,5 @@ module_platform_driver(mdio_gpio_driver); MODULE_ALIAS("platform:mdio-gpio"); MODULE_AUTHOR("Laurent Pinchart, Paulius Zaleckas"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Generic driver for MDIO bus emulation using GPIO"); diff --git a/drivers/net/phy/mdio-hisi-femac.c b/drivers/net/phy/mdio-hisi-femac.c index b03fedd6c1d8..287f3ccf1da1 100644 --- a/drivers/net/phy/mdio-hisi-femac.c +++ b/drivers/net/phy/mdio-hisi-femac.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Hisilicon Fast Ethernet MDIO Bus Driver * * Copyright (c) 2016 HiSilicon Technologies Co., Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ #include @@ -163,4 +151,4 @@ module_platform_driver(hisi_femac_mdio_driver); MODULE_DESCRIPTION("Hisilicon Fast Ethernet MAC MDIO interface driver"); MODULE_AUTHOR("Dongpo Li "); -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c index 6d24fd13ca86..0dce67672548 100644 --- a/drivers/net/phy/mdio-i2c.c +++ b/drivers/net/phy/mdio-i2c.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * MDIO I2C bridge * * Copyright (C) 2015-2016 Russell King * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Network PHYs can appear on I2C buses when they are part of SFP module. * This driver exposes these PHYs to the networking PHY code, allowing * our PHY drivers access to these PHYs, and so allowing configuration diff --git a/drivers/net/phy/mdio-i2c.h b/drivers/net/phy/mdio-i2c.h index 889ab57d7f3e..751dab281f57 100644 --- a/drivers/net/phy/mdio-i2c.h +++ b/drivers/net/phy/mdio-i2c.h @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * MDIO I2C bridge * * Copyright (C) 2015 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef MDIO_I2C_H #define MDIO_I2C_H diff --git a/drivers/net/phy/mdio-moxart.c b/drivers/net/phy/mdio-moxart.c index 5bb56d126693..af3910fe8ec7 100644 --- a/drivers/net/phy/mdio-moxart.c +++ b/drivers/net/phy/mdio-moxart.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* MOXA ART Ethernet (RTL8201CP) MDIO interface driver * * Copyright (C) 2013 Jonas Jensen - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #include @@ -190,4 +187,4 @@ module_platform_driver(moxart_mdio_driver); MODULE_DESCRIPTION("MOXA ART MDIO interface driver"); MODULE_AUTHOR("Jonas Jensen "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 696bdf1e4576..88d409e48c1f 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright 2016 Broadcom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation (the "GPL"). - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 (GPLv2) for more details. - * - * You should have received a copy of the GNU General Public License - * version 2 (GPLv2) along with this source code. */ #include #include diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c index fe34576262bd..6c8960df43b0 100644 --- a/drivers/net/phy/mdio-mux-gpio.c +++ b/drivers/net/phy/mdio-mux-gpio.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2011, 2012 Cavium, Inc. */ @@ -103,4 +100,4 @@ module_platform_driver(mdio_mux_gpio_driver); MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_VERSION(DRV_VERSION); MODULE_AUTHOR("David Daney"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-mux-mmioreg.c b/drivers/net/phy/mdio-mux-mmioreg.c index 70f6115530af..d1a8780e24d8 100644 --- a/drivers/net/phy/mdio-mux-mmioreg.c +++ b/drivers/net/phy/mdio-mux-mmioreg.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Simple memory-mapped device MDIO MUX driver * * Author: Timur Tabi * * Copyright 2012 Freescale Semiconductor, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 0a86f1e4c02f..6a1d3540210b 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2011, 2012 Cavium, Inc. */ @@ -210,4 +207,4 @@ EXPORT_SYMBOL_GPL(mdio_mux_uninit); MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("David Daney"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c index ab6914f8bd50..8327382aa568 100644 --- a/drivers/net/phy/mdio-octeon.c +++ b/drivers/net/phy/mdio-octeon.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2009-2015 Cavium, Inc. */ @@ -122,4 +119,4 @@ module_platform_driver(octeon_mdiobus_driver); MODULE_DESCRIPTION("Cavium OCTEON MDIO bus driver"); MODULE_AUTHOR("David Daney"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c index 6425ce04d3f9..20ffd8fb79ce 100644 --- a/drivers/net/phy/mdio-sun4i.c +++ b/drivers/net/phy/mdio-sun4i.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Allwinner EMAC MDIO interface driver * @@ -6,10 +7,6 @@ * * Based on the Linux driver provided by Allwinner: * Copyright (C) 1997 Sten Wang - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. */ #include @@ -179,4 +176,4 @@ module_platform_driver(sun4i_mdio_driver); MODULE_DESCRIPTION("Allwinner EMAC MDIO interface driver"); MODULE_AUTHOR("Maxime Ripard "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-thunder.c b/drivers/net/phy/mdio-thunder.c index 1546f6398831..b6128ae7f14f 100644 --- a/drivers/net/phy/mdio-thunder.c +++ b/drivers/net/phy/mdio-thunder.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * * Copyright (C) 2009-2016 Cavium, Inc. */ @@ -151,4 +148,4 @@ static struct pci_driver thunder_mdiobus_driver = { module_pci_driver(thunder_mdiobus_driver); MODULE_DESCRIPTION("Cavium ThunderX MDIO bus driver"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index 07c6048200c6..717cc2a056e8 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Applied Micro X-Gene SoC MDIO Driver * * Copyright (c) 2016, Applied Micro Circuits Corporation * Author: Iyappan Subramanian - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ #include diff --git a/drivers/net/phy/mdio-xgene.h b/drivers/net/phy/mdio-xgene.h index 3c85f3e30baa..b1f5ccb4ad9c 100644 --- a/drivers/net/phy/mdio-xgene.h +++ b/drivers/net/phy/mdio-xgene.h @@ -1,20 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Applied Micro X-Gene SoC MDIO Driver * * Copyright (c) 2016, Applied Micro Circuits Corporation * Author: Iyappan Subramanian - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . */ #ifndef __MDIO_XGENE_H__ diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 741f27228088..3d313585c9c1 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -1,14 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ /* MDIO Bus interface * * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -388,6 +383,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) if (IS_ERR(gpiod)) { dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", bus->id); + device_del(&bus->dev); return PTR_ERR(gpiod); } else if (gpiod) { bus->reset_gpiod = gpiod; diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index c924700cf37b..887076292e50 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -1,12 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Framework for MDIO devices, other than PHYs. * * Copyright (c) 2016 Andrew Lunn - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index b03bcf2c388a..a238388eb1a5 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -1,20 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Amlogic Meson GXL Internal PHY Driver * * Copyright (C) 2015 Amlogic, Inc. All rights reserved. * Copyright (C) 2016 BayLibre, SAS. All rights reserved. * Author: Neil Armstrong - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * */ #include #include @@ -233,6 +223,7 @@ static struct phy_driver meson_gxl_phy[] = { .name = "Meson GXL Internal PHY", .features = PHY_BASIC_FEATURES, .flags = PHY_IS_INTERNAL, + .soft_reset = genphy_soft_reset, .config_init = meson_gxl_config_init, .aneg_done = genphy_aneg_done, .read_status = meson_gxl_read_status, diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 670698b7ef0e..38d5bd877346 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/micrel.c * @@ -8,11 +9,6 @@ * Copyright (c) 2010-2013 Micrel, Inc. * Copyright (c) 2014 Johan Hovold * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * * Support : Micrel Phys: * Giga phys: ksz9021, ksz9031, ksz9131 * 100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041 @@ -1049,6 +1045,7 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz9021_type, .probe = kszphy_probe, .config_init = ksz9031_config_init, + .soft_reset = genphy_soft_reset, .read_status = ksz9031_read_status, .ack_interrupt = kszphy_ack_interrupt, .config_intr = kszphy_config_intr, @@ -1077,6 +1074,7 @@ static struct phy_driver ksphy_driver[] = { .phy_id = PHY_ID_KSZ8873MLL, .phy_id_mask = MICREL_PHY_ID_MASK, .name = "Micrel KSZ8873MLL Switch", + .features = PHY_BASIC_FEATURES, .config_init = kszphy_config_init, .config_aneg = ksz8873mll_config_aneg, .read_status = ksz8873mll_read_status, diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 7557bebd5d7f..c6cbb3aa8ae0 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -1,18 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2015 Microchip Technology - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . */ #include #include diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 3949fe299b18..db50efb30df5 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) /* * Driver for Microsemi VSC85xx PHYs * diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c index 139bed2c8ab4..42282a86b680 100644 --- a/drivers/net/phy/national.c +++ b/drivers/net/phy/national.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/national.c * @@ -7,12 +8,6 @@ * Maintainer: Giuseppe Cavallaro * * Copyright (c) 2008 STMicroelectronics Limited - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 20fbd5eb56fd..909b3344babf 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -1,10 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Core PHY library, taken from phy.c - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include #include diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f7a92e7edff7..d12aa512b7f5 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Framework for configuring and reading PHY devices * Based on code in sungem_phy.c and gianfar_phy.c * @@ -5,12 +6,6 @@ * * Copyright (c) 2004 Freescale Semiconductor, Inc. * Copyright (c) 2006, 2007 Maciej W. Rozycki - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include @@ -538,13 +533,6 @@ int phy_start_aneg(struct phy_device *phydev) mutex_lock(&phydev->lock); - if (!__phy_is_started(phydev)) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); - err = -EBUSY; - goto out_unlock; - } - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); @@ -555,11 +543,13 @@ int phy_start_aneg(struct phy_device *phydev) if (err < 0) goto out_unlock; - if (phydev->autoneg == AUTONEG_ENABLE) { - err = phy_check_link_status(phydev); - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; + if (__phy_is_started(phydev)) { + if (phydev->autoneg == AUTONEG_ENABLE) { + err = phy_check_link_status(phydev); + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } } out_unlock: @@ -795,28 +785,27 @@ static int phy_enable_interrupts(struct phy_device *phydev) } /** - * phy_start_interrupts - request and enable interrupts for a PHY device + * phy_request_interrupt - request interrupt for a PHY device * @phydev: target phy_device struct * * Description: Request the interrupt for the given PHY. * If this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. * This should only be called with a valid IRQ number. - * Returns 0 on success or < 0 on error. */ -int phy_start_interrupts(struct phy_device *phydev) +void phy_request_interrupt(struct phy_device *phydev) { - if (request_threaded_irq(phydev->irq, NULL, phy_interrupt, - IRQF_ONESHOT | IRQF_SHARED, - phydev_name(phydev), phydev) < 0) { - phydev_warn(phydev, "Can't get IRQ %d\n", phydev->irq); - phydev->irq = PHY_POLL; - return 0; - } + int err; - return phy_enable_interrupts(phydev); + err = request_threaded_irq(phydev->irq, NULL, phy_interrupt, + IRQF_ONESHOT | IRQF_SHARED, + phydev_name(phydev), phydev); + if (err) { + phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n", + err, phydev->irq); + phydev->irq = PHY_POLL; + } } -EXPORT_SYMBOL(phy_start_interrupts); +EXPORT_SYMBOL(phy_request_interrupt); /** * phy_stop - Bring down the PHY link, and stop checking the status @@ -862,33 +851,34 @@ EXPORT_SYMBOL(phy_stop); */ void phy_start(struct phy_device *phydev) { - int err = 0; + int err; mutex_lock(&phydev->lock); - switch (phydev->state) { - case PHY_READY: - phydev->state = PHY_UP; - break; - case PHY_HALTED: - /* if phy was suspended, bring the physical link up again */ - __phy_resume(phydev); - - /* make sure interrupts are re-enabled for the PHY */ - if (phy_interrupt_is_valid(phydev)) { - err = phy_enable_interrupts(phydev); - if (err < 0) - break; - } - - phydev->state = PHY_RESUMING; - break; - default: - break; + if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) { + WARN(1, "called from state %s\n", + phy_state_to_str(phydev->state)); + goto out; } - mutex_unlock(&phydev->lock); - phy_trigger_machine(phydev); + /* if phy was suspended, bring the physical link up again */ + __phy_resume(phydev); + + /* make sure interrupts are enabled for the PHY */ + if (phy_interrupt_is_valid(phydev)) { + err = phy_enable_interrupts(phydev); + if (err < 0) + goto out; + } + + if (phydev->state == PHY_READY) + phydev->state = PHY_UP; + else + phydev->state = PHY_RESUMING; + + phy_start_machine(phydev); +out: + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_start); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 7b3164174251..891e0178b97f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1,15 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Framework for finding and configuring PHYs. * Also contains generic PHY driver * * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -59,6 +54,9 @@ EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_features); +__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; +EXPORT_SYMBOL_GPL(phy_10gbit_fec_features); + static const int phy_basic_ports_array[] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, @@ -107,6 +105,11 @@ const int phy_10gbit_features_array[1] = { }; EXPORT_SYMBOL_GPL(phy_10gbit_features_array); +const int phy_10gbit_fec_features_array[1] = { + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, +}; +EXPORT_SYMBOL_GPL(phy_10gbit_fec_features_array); + __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_full_features); @@ -189,6 +192,10 @@ static void features_init(void) linkmode_set_bit_array(phy_10gbit_full_features_array, ARRAY_SIZE(phy_10gbit_full_features_array), phy_10gbit_full_features); + /* 10G FEC only */ + linkmode_set_bit_array(phy_10gbit_fec_features_array, + ARRAY_SIZE(phy_10gbit_fec_features_array), + phy_10gbit_fec_features); } void phy_device_free(struct phy_device *phydev) @@ -552,10 +559,12 @@ static int phy_request_driver_module(struct phy_device *dev, int phy_id) ret = request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id)); - /* we only check for failures in executing the usermode binary, - * not whether a PHY driver module exists for the PHY ID + /* We only check for failures in executing the usermode binary, + * not whether a PHY driver module exists for the PHY ID. + * Accept -ENOENT because this may occur in case no initramfs exists, + * then modprobe isn't available. */ - if (IS_ENABLED(CONFIG_MODULES) && ret < 0) { + if (IS_ENABLED(CONFIG_MODULES) && ret < 0 && ret != -ENOENT) { phydev_err(dev, "error %d loading PHY driver module for ID 0x%08x\n", ret, phy_id); return ret; @@ -942,9 +951,8 @@ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev, return rc; phy_prepare_link(phydev, handler); - phy_start_machine(phydev); - if (phydev->irq > 0) - phy_start_interrupts(phydev); + if (phy_interrupt_is_valid(phydev)) + phy_request_interrupt(phydev); return 0; } @@ -1058,7 +1066,7 @@ int phy_init_hw(struct phy_device *phydev) /* Deassert the reset signal */ phy_device_reset(phydev, 0); - if (!phydev->drv || !phydev->drv->config_init) + if (!phydev->drv) return 0; if (phydev->drv->soft_reset) @@ -1071,7 +1079,10 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; - return phydev->drv->config_init(phydev); + if (phydev->drv->config_init) + ret = phydev->drv->config_init(phydev); + + return ret; } EXPORT_SYMBOL(phy_init_hw); @@ -2290,6 +2301,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner) { int retval; + if (WARN_ON(!new_driver->features)) { + pr_err("%s: Driver features are missing\n", new_driver->name); + return -EINVAL; + } + new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; new_driver->mdiodrv.driver.name = new_driver->name; new_driver->mdiodrv.driver.bus = &mdio_bus_type; diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index 263385b75bba..b86a4b2116f8 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -1,15 +1,5 @@ -/* Copyright (C) 2016 National Instruments Corp. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2016 National Instruments Corp. */ #include #include #include diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e7becc7379d7..2e21ce42e388 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * phylink models the MAC to optional PHY connection, supporting * technologies such as SFP cages where the PHY is hot-pluggable. * * Copyright (C) 2015 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include @@ -679,9 +676,8 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising); - phy_start_machine(phy); - if (phy->irq > 0) - phy_start_interrupts(phy); + if (phy_interrupt_is_valid(phy)) + phy_request_interrupt(phy); return 0; } @@ -1697,4 +1693,4 @@ void phylink_helper_basex_speed(struct phylink_link_state *state) } EXPORT_SYMBOL_GPL(phylink_helper_basex_speed); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c index cfe2313dbefd..5486f6fb2ab2 100644 --- a/drivers/net/phy/qsemi.c +++ b/drivers/net/phy/qsemi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/qsemi.c * @@ -6,12 +7,6 @@ * Author: Andy Fleming * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index c6010fb1aa0f..75543f9370f3 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/realtek.c * @@ -6,12 +7,6 @@ * Author: Johnson Leung * * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include #include diff --git a/drivers/net/phy/rockchip.c b/drivers/net/phy/rockchip.c index f1da70b9b55f..95abf7072f32 100644 --- a/drivers/net/phy/rockchip.c +++ b/drivers/net/phy/rockchip.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /** * drivers/net/phy/rockchip.c * @@ -6,12 +7,6 @@ * Copyright (c) 2017, Fuzhou Rockchip Electronics Co., Ltd * * David Wu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include @@ -229,4 +224,4 @@ MODULE_DEVICE_TABLE(mdio, rockchip_phy_tbl); MODULE_AUTHOR("David Wu "); MODULE_DESCRIPTION("Rockchip Ethernet PHY driver"); -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index fd8bb998ae52..298ab7546929 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index f9477ff55545..c94d3bfbc772 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/smsc.c * @@ -7,11 +8,6 @@ * * Copyright (c) 2006 Herbert Valerio Riedel * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * * Support added for SMSC LAN8187 and LAN8700 by steve.glendinning@shawell.net * */ diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index f17b3441779b..92b64e254b44 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SPI driver for Micrel/Kendin KS8995M and KSZ8864RMN ethernet switches * @@ -5,10 +6,6 @@ * * This file was based on: drivers/spi/at25.c * Copyright (C) 2006 David Brownell - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c index 33d733684f5b..5b6acf431f98 100644 --- a/drivers/net/phy/ste10Xp.c +++ b/drivers/net/phy/ste10Xp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/ste10Xp.c * @@ -6,12 +7,6 @@ * Author: Giuseppe Cavallaro * * Copyright (c) 2008 STMicroelectronics Limited - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include diff --git a/drivers/net/phy/swphy.c b/drivers/net/phy/swphy.c index 34f58f2349e9..b9743569e431 100644 --- a/drivers/net/phy/swphy.c +++ b/drivers/net/phy/swphy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Software PHY emulation * @@ -7,11 +8,6 @@ * Anton Vorontsov * * Copyright (c) 2006-2007 MontaVista Software, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. */ #include #include diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c index 22f3bdd8206c..145c328b00fa 100644 --- a/drivers/net/phy/teranetics.c +++ b/drivers/net/phy/teranetics.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Driver for Teranetics PHY * * Author: Shaohui Xie * * Copyright 2015 Freescale Semiconductor, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include @@ -80,6 +77,7 @@ static struct phy_driver teranetics_driver[] = { .phy_id = PHY_ID_TN2020, .phy_id_mask = 0xffffffff, .name = "Teranetics TN2020", + .features = PHY_10GBIT_FEATURES, .soft_reset = gen10g_no_soft_reset, .aneg_done = teranetics_aneg_done, .config_init = gen10g_config_init, diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c index 1e4fc42e4629..219fc7cdc2b3 100644 --- a/drivers/net/phy/uPD60620.c +++ b/drivers/net/phy/uPD60620.c @@ -1,13 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for the Renesas PHY uPD60620. * * Copyright (C) 2015 Softing Industrial Automation GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 0646af458f6a..dc0dd87a6694 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -1,15 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for Vitesse PHYs * * Author: Kriston Carson - * - * Copyright (c) 2005, 2009, 2011 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 74a8782313cf..ebf419dc7307 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Xilinx GMII2RGMII Converter driver * * Copyright (C) 2016 Xilinx, Inc. @@ -8,16 +9,6 @@ * * Description: * This driver is developed for Xilinx GMII2RGMII Converter - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include #include diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 62dc564b251d..f22639f0116a 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 57f1c94fca0b..820a2fe7d027 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1287,6 +1287,20 @@ static const struct driver_info asix112_info = { #undef ASIX112_DESC +static const struct driver_info trendnet_info = { + .description = "USB-C 3.1 to 5GBASE-T Ethernet Adapter", + .bind = aqc111_bind, + .unbind = aqc111_unbind, + .status = aqc111_status, + .link_reset = aqc111_link_reset, + .reset = aqc111_reset, + .stop = aqc111_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | + FLAG_AVOID_UNLINK_URBS | FLAG_MULTI_PACKET, + .rx_fixup = aqc111_rx_fixup, + .tx_fixup = aqc111_tx_fixup, +}; + static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1440,6 +1454,7 @@ static const struct usb_device_id products[] = { {AQC111_USB_ETH_DEV(0x2eca, 0xc101, aqc111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2790, asix111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2791, asix112_info)}, + {AQC111_USB_ETH_DEV(0x20f4, 0xe05a, trendnet_info)}, { },/* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index b654f05b2ccd..3d93993e74da 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -739,8 +739,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); chipcode &= AX_CHIPCODE_MASK; - (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : - ax88772a_hw_reset(dev, 0); + ret = (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : + ax88772a_hw_reset(dev, 0); + + if (ret < 0) { + netdev_dbg(dev->net, "Failed to reset AX88772: %d\n", ret); + return ret; + } /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = asix_get_phyid(dev); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 3305f23793c7..5512a1038721 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -843,6 +843,14 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* USB-C 3.1 to 5GBASE-T Ethernet Adapter (based on AQC111U) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0xe05a, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 80373a9171dd..3f145e4c6c08 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -388,7 +388,6 @@ static void read_bulk_callback(struct urb *urb) unsigned pkt_len, res; struct sk_buff *skb; struct net_device *netdev; - u16 rx_stat; int status = urb->status; int result; unsigned long flags; @@ -424,7 +423,6 @@ static void read_bulk_callback(struct urb *urb) goto goon; res = urb->actual_length; - rx_stat = le16_to_cpu(*(__le16 *)(urb->transfer_buffer + res - 4)); pkt_len = res - 4; skb_put(dev->rx_skb, pkt_len); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 023725086046..2a0edd4653e3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1035,6 +1035,7 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, goto frame_err; } + skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); @@ -1330,7 +1331,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } -static void free_old_xmit_skbs(struct send_queue *sq) +static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { struct sk_buff *skb; unsigned int len; @@ -1343,7 +1344,7 @@ static void free_old_xmit_skbs(struct send_queue *sq) bytes += skb->len; packets++; - dev_consume_skb_any(skb); + napi_consume_skb(skb, in_napi); } /* Avoid overhead when no packets have been processed @@ -1369,7 +1370,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq) return; if (__netif_tx_trylock(txq)) { - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); } @@ -1445,7 +1446,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); __netif_tx_lock(txq, raw_smp_processor_id()); - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); virtqueue_napi_complete(napi, sq->vq, 0); @@ -1514,7 +1515,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) bool use_napi = sq->napi.weight; /* Free up any pending old buffers before queueing new ones. */ - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, false); if (use_napi && kick) virtqueue_enable_cb_delayed(sq->vq); @@ -1557,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) if (!use_napi && unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ - free_old_xmit_skbs(sq); + free_old_xmit_skbs(sq, false); if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) { netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); diff --git a/drivers/net/wan/lmc/Makefile b/drivers/net/wan/lmc/Makefile index 609710d64eb5..247f60c401ef 100644 --- a/drivers/net/wan/lmc/Makefile +++ b/drivers/net/wan/lmc/Makefile @@ -14,4 +14,4 @@ lmc-objs := lmc_debug.o lmc_media.o lmc_main.o lmc_proto.o # -DDEBUG \ # -DLMC_PACKET_LOG -ccflags-y := -I. $(DBGDEF) +ccflags-y := $(DBGDEF) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3a4b8786f7ea..320edcac4699 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2761,6 +2761,11 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, BIT(NL80211_CHAN_WIDTH_160); } + if (!n_limits) { + err = -EINVAL; + goto failed_hw; + } + data->if_combination.n_limits = n_limits; data->if_combination.max_interfaces = 2048; data->if_combination.limits = data->if_limits; diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 9479b26b2d02..71044c6cfd8c 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -523,8 +523,10 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, SET_NETDEV_DEV(dev, &priv->lowerdev->dev); dev->ieee80211_ptr = kzalloc(sizeof(*dev->ieee80211_ptr), GFP_KERNEL); - if (!dev->ieee80211_ptr) + if (!dev->ieee80211_ptr) { + err = -ENOMEM; goto remove_handler; + } dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; dev->ieee80211_ptr->wiphy = common_wiphy; diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index 0cf58cabc9ed..3cf50274fadb 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -26,6 +26,12 @@ static int nvdimm_probe(struct device *dev) struct nvdimm_drvdata *ndd; int rc; + rc = nvdimm_security_setup_events(dev); + if (rc < 0) { + dev_err(dev, "security event setup failed: %d\n", rc); + return rc; + } + rc = nvdimm_check_config_data(dev); if (rc) { /* not required for non-aliased nvdimm, ex. NVDIMM-N */ diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 4890310df874..efe412a6b5b9 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -578,13 +578,25 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, } EXPORT_SYMBOL_GPL(__nvdimm_create); -int nvdimm_security_setup_events(struct nvdimm *nvdimm) +static void shutdown_security_notify(void *data) { - nvdimm->sec.overwrite_state = sysfs_get_dirent(nvdimm->dev.kobj.sd, - "security"); + struct nvdimm *nvdimm = data; + + sysfs_put(nvdimm->sec.overwrite_state); +} + +int nvdimm_security_setup_events(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + if (nvdimm->sec.state < 0 || !nvdimm->sec.ops + || !nvdimm->sec.ops->overwrite) + return 0; + nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security"); if (!nvdimm->sec.overwrite_state) - return -ENODEV; - return 0; + return -ENOMEM; + + return devm_add_action_or_reset(dev, shutdown_security_notify, nvdimm); } EXPORT_SYMBOL_GPL(nvdimm_security_setup_events); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 2b2cf4e554d3..e5ffd5733540 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -54,12 +54,12 @@ struct nvdimm { }; static inline enum nvdimm_security_state nvdimm_security_state( - struct nvdimm *nvdimm, bool master) + struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype) { if (!nvdimm->sec.ops) return -ENXIO; - return nvdimm->sec.ops->state(nvdimm, master); + return nvdimm->sec.ops->state(nvdimm, ptype); } int nvdimm_security_freeze(struct nvdimm *nvdimm); #if IS_ENABLED(CONFIG_NVDIMM_KEYS) diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index cfde992684e7..379bf4305e61 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -250,6 +250,7 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, void nvdimm_set_aliasing(struct device *dev); void nvdimm_set_locked(struct device *dev); void nvdimm_clear_locked(struct device *dev); +int nvdimm_security_setup_events(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_KEYS) int nvdimm_security_unlock(struct device *dev); #else diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index df4b3a6db51b..b9fff3b8ed1b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -545,8 +545,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) + ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc); - if (!(ctrl->anacap & (1 << 6))) - ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32); + ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32); if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) { dev_err(ctrl->device, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index deb1a66bf117..9bc585415d9b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2041,14 +2041,18 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) return ret; } +/* irq_queues covers admin queue */ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) { unsigned int this_w_queues = write_queues; + WARN_ON(!irq_queues); + /* - * Setup read/write queue split + * Setup read/write queue split, assign admin queue one independent + * irq vector if irq_queues is > 1. */ - if (irq_queues == 1) { + if (irq_queues <= 2) { dev->io_queues[HCTX_TYPE_DEFAULT] = 1; dev->io_queues[HCTX_TYPE_READ] = 0; return; @@ -2056,21 +2060,21 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) /* * If 'write_queues' is set, ensure it leaves room for at least - * one read queue + * one read queue and one admin queue */ if (this_w_queues >= irq_queues) - this_w_queues = irq_queues - 1; + this_w_queues = irq_queues - 2; /* * If 'write_queues' is set to zero, reads and writes will share * a queue set. */ if (!this_w_queues) { - dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues; + dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1; dev->io_queues[HCTX_TYPE_READ] = 0; } else { dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; - dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues; + dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1; } } @@ -2095,7 +2099,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) this_p_queues = nr_io_queues - 1; irq_queues = 1; } else { - irq_queues = nr_io_queues - this_p_queues; + irq_queues = nr_io_queues - this_p_queues + 1; } dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; @@ -2115,8 +2119,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) * If we got a failure and we're down to asking for just * 1 + 1 queues, just ask for a single vector. We'll share * that between the single IO queue and the admin queue. + * Otherwise, we assign one independent vector to admin queue. */ - if (result >= 0 && irq_queues > 1) + if (irq_queues > 1) irq_queues = irq_sets[0] + irq_sets[1] + 1; result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0a2fd2949ad7..52abc3a6de12 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -119,6 +119,7 @@ struct nvme_rdma_ctrl { struct nvme_ctrl ctrl; bool use_inline_data; + u32 io_queues[HCTX_MAX_TYPES]; }; static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) @@ -165,8 +166,8 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue) static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue) { return nvme_rdma_queue_idx(queue) > - queue->ctrl->ctrl.opts->nr_io_queues + - queue->ctrl->ctrl.opts->nr_write_queues; + queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] + + queue->ctrl->io_queues[HCTX_TYPE_READ]; } static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue) @@ -661,8 +662,21 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) nr_io_queues = min_t(unsigned int, nr_io_queues, ibdev->num_comp_vectors); - nr_io_queues += min(opts->nr_write_queues, num_online_cpus()); - nr_io_queues += min(opts->nr_poll_queues, num_online_cpus()); + if (opts->nr_write_queues) { + ctrl->io_queues[HCTX_TYPE_DEFAULT] = + min(opts->nr_write_queues, nr_io_queues); + nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT]; + } else { + ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues; + } + + ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues; + + if (opts->nr_poll_queues) { + ctrl->io_queues[HCTX_TYPE_POLL] = + min(opts->nr_poll_queues, num_online_cpus()); + nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL]; + } ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) @@ -1689,18 +1703,28 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq, bool reserved) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_rdma_queue *queue = req->queue; + struct nvme_rdma_ctrl *ctrl = queue->ctrl; - dev_warn(req->queue->ctrl->ctrl.device, - "I/O %d QID %d timeout, reset controller\n", - rq->tag, nvme_rdma_queue_idx(req->queue)); + dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", + rq->tag, nvme_rdma_queue_idx(queue)); - /* queue error recovery */ - nvme_rdma_error_recovery(req->queue->ctrl); + if (ctrl->ctrl.state != NVME_CTRL_LIVE) { + /* + * Teardown immediately if controller times out while starting + * or we are already started error recovery. all outstanding + * requests are completed on shutdown, so we return BLK_EH_DONE. + */ + flush_work(&ctrl->err_work); + nvme_rdma_teardown_io_queues(ctrl, false); + nvme_rdma_teardown_admin_queue(ctrl, false); + return BLK_EH_DONE; + } - /* fail with DNR on cmd timeout */ - nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; + dev_warn(ctrl->ctrl.device, "starting error recovery\n"); + nvme_rdma_error_recovery(ctrl); - return BLK_EH_DONE; + return BLK_EH_RESET_TIMER; } static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1779,17 +1803,15 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) struct nvme_rdma_ctrl *ctrl = set->driver_data; set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; - set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues; + set->map[HCTX_TYPE_DEFAULT].nr_queues = + ctrl->io_queues[HCTX_TYPE_DEFAULT]; + set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ]; if (ctrl->ctrl.opts->nr_write_queues) { /* separate read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->ctrl.opts->nr_write_queues; set->map[HCTX_TYPE_READ].queue_offset = - ctrl->ctrl.opts->nr_write_queues; + ctrl->io_queues[HCTX_TYPE_DEFAULT]; } else { /* mixed read/write queues */ - set->map[HCTX_TYPE_DEFAULT].nr_queues = - ctrl->ctrl.opts->nr_io_queues; set->map[HCTX_TYPE_READ].queue_offset = 0; } blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT], @@ -1799,12 +1821,12 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) if (ctrl->ctrl.opts->nr_poll_queues) { set->map[HCTX_TYPE_POLL].nr_queues = - ctrl->ctrl.opts->nr_poll_queues; + ctrl->io_queues[HCTX_TYPE_POLL]; set->map[HCTX_TYPE_POLL].queue_offset = - ctrl->ctrl.opts->nr_io_queues; + ctrl->io_queues[HCTX_TYPE_DEFAULT]; if (ctrl->ctrl.opts->nr_write_queues) set->map[HCTX_TYPE_POLL].queue_offset += - ctrl->ctrl.opts->nr_write_queues; + ctrl->io_queues[HCTX_TYPE_READ]; blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); } return 0; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 265a0543b381..5f0a00425242 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1948,20 +1948,23 @@ nvme_tcp_timeout(struct request *rq, bool reserved) struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; struct nvme_tcp_cmd_pdu *pdu = req->pdu; - dev_dbg(ctrl->ctrl.device, + dev_warn(ctrl->ctrl.device, "queue %d: timeout request %#x type %d\n", - nvme_tcp_queue_id(req->queue), rq->tag, - pdu->hdr.type); + nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); if (ctrl->ctrl.state != NVME_CTRL_LIVE) { - union nvme_result res = {}; - - nvme_req(rq)->flags |= NVME_REQ_CANCELLED; - nvme_end_request(rq, cpu_to_le16(NVME_SC_ABORT_REQ), res); + /* + * Teardown immediately if controller times out while starting + * or we are already started error recovery. all outstanding + * requests are completed on shutdown, so we return BLK_EH_DONE. + */ + flush_work(&ctrl->err_work); + nvme_tcp_teardown_io_queues(&ctrl->ctrl, false); + nvme_tcp_teardown_admin_queue(&ctrl->ctrl, false); return BLK_EH_DONE; } - /* queue error recovery */ + dev_warn(ctrl->ctrl.device, "starting error recovery\n"); nvme_tcp_error_recovery(&ctrl->ctrl); return BLK_EH_RESET_TIMER; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index a8d23eb80192..a884e3a0e8af 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -139,6 +139,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); +static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); +static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_rsp *r); static const struct nvmet_fabrics_ops nvmet_rdma_ops; @@ -182,9 +186,17 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) spin_unlock_irqrestore(&queue->rsps_lock, flags); if (unlikely(!rsp)) { - rsp = kmalloc(sizeof(*rsp), GFP_KERNEL); + int ret; + + rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); if (unlikely(!rsp)) return NULL; + ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); + if (unlikely(ret)) { + kfree(rsp); + return NULL; + } + rsp->allocated = true; } @@ -197,6 +209,7 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) unsigned long flags; if (unlikely(rsp->allocated)) { + nvmet_rdma_free_rsp(rsp->queue->dev, rsp); kfree(rsp); return; } diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 44b37b202e39..ad0df786fe93 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1089,7 +1089,7 @@ out: static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue) { - int result; + int result = 0; if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR)) return 0; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index a09c1c3cf831..49b16f76d78e 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -207,11 +207,8 @@ static void __of_attach_node(struct device_node *np) if (!of_node_check_flag(np, OF_OVERLAY)) { np->name = __of_get_property(np, "name", NULL); - np->type = __of_get_property(np, "device_type", NULL); if (!np->name) np->name = ""; - if (!np->type) - np->type = ""; phandle = __of_get_property(np, "phandle", &sz); if (!phandle) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 7099c652c6a5..9cc1461aac7d 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -314,12 +314,8 @@ static bool populate_node(const void *blob, populate_properties(blob, offset, mem, np, pathp, dryrun); if (!dryrun) { np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - if (!np->name) np->name = ""; - if (!np->type) - np->type = ""; } *pnp = np; diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 2b5ac43a5690..c423e94baf0f 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -423,12 +423,9 @@ static int add_changeset_node(struct overlay_changeset *ovcs, tchild->parent = target->np; tchild->name = __of_get_property(node, "name", NULL); - tchild->type = __of_get_property(node, "device_type", NULL); if (!tchild->name) tchild->name = ""; - if (!tchild->type) - tchild->type = ""; /* ignore obsolete "linux,phandle" */ phandle = __of_get_property(node, "phandle", &size); diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c index d3185063d369..7eda43c66c91 100644 --- a/drivers/of/pdt.c +++ b/drivers/of/pdt.c @@ -155,7 +155,6 @@ static struct device_node * __init of_pdt_create_node(phandle node, dp->parent = parent; dp->name = of_pdt_get_one_property(node, "name"); - dp->type = of_pdt_get_one_property(node, "device_type"); dp->phandle = node; dp->properties = of_pdt_build_prop_list(node); diff --git a/drivers/of/property.c b/drivers/of/property.c index 08430031bd28..8631efa1daa1 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -806,6 +806,7 @@ struct device_node *of_graph_get_remote_node(const struct device_node *node, if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); + of_node_put(remote); return NULL; } diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 4310c7a4212e..2ab92409210a 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -21,13 +21,14 @@ menuconfig PCI support for PCI-X and the foundations for PCI Express support. Say 'Y' here unless you know what you are doing. +if PCI + config PCI_DOMAINS bool depends on PCI config PCI_DOMAINS_GENERIC bool - depends on PCI select PCI_DOMAINS config PCI_SYSCALL @@ -37,7 +38,6 @@ source "drivers/pci/pcie/Kconfig" config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" - depends on PCI select GENERIC_MSI_IRQ help This allows device drivers to enable MSI (Message Signaled @@ -59,7 +59,6 @@ config PCI_MSI_IRQ_DOMAIN config PCI_QUIRKS default y bool "Enable PCI quirk workarounds" if EXPERT - depends on PCI help This enables workarounds for various PCI chipset bugs/quirks. Disable this only if your target machine is unaffected by PCI @@ -67,7 +66,7 @@ config PCI_QUIRKS config PCI_DEBUG bool "PCI Debugging" - depends on PCI && DEBUG_KERNEL + depends on DEBUG_KERNEL help Say Y here if you want the PCI core to produce a bunch of debug messages to the system log. Select this if you are having a @@ -77,7 +76,6 @@ config PCI_DEBUG config PCI_REALLOC_ENABLE_AUTO bool "Enable PCI resource re-allocation detection" - depends on PCI depends on PCI_IOV help Say Y here if you want the PCI core to detect if PCI resource @@ -90,7 +88,6 @@ config PCI_REALLOC_ENABLE_AUTO config PCI_STUB tristate "PCI Stub driver" - depends on PCI help Say Y or M here if you want be able to reserve a PCI device when it is going to be assigned to a guest operating system. @@ -99,7 +96,6 @@ config PCI_STUB config PCI_PF_STUB tristate "PCI PF Stub driver" - depends on PCI depends on PCI_IOV help Say Y or M here if you want to enable support for devices that @@ -111,7 +107,7 @@ config PCI_PF_STUB config XEN_PCIDEV_FRONTEND tristate "Xen PCI Frontend" - depends on PCI && X86 && XEN + depends on X86 && XEN select PCI_XEN select XEN_XENBUS_FRONTEND default y @@ -133,7 +129,6 @@ config PCI_BRIDGE_EMUL config PCI_IOV bool "PCI IOV support" - depends on PCI select PCI_ATS help I/O Virtualization is a PCI feature supported by some devices @@ -144,7 +139,6 @@ config PCI_IOV config PCI_PRI bool "PCI PRI support" - depends on PCI select PCI_ATS help PRI is the PCI Page Request Interface. It allows PCI devices that are @@ -154,7 +148,6 @@ config PCI_PRI config PCI_PASID bool "PCI PASID support" - depends on PCI select PCI_ATS help Process Address Space Identifiers (PASIDs) can be used by PCI devices @@ -167,7 +160,7 @@ config PCI_PASID config PCI_P2PDMA bool "PCI peer-to-peer transfer support" - depends on PCI && ZONE_DEVICE + depends on ZONE_DEVICE select GENERIC_ALLOCATOR help EnableÑ• drivers to do PCI peer-to-peer transactions to and from @@ -184,12 +177,11 @@ config PCI_P2PDMA config PCI_LABEL def_bool y if (DMI || ACPI) - depends on PCI select NLS config PCI_HYPERV tristate "Hyper-V PCI Frontend" - depends on PCI && X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 + depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64 help The PCI device frontend driver allows the kernel to import arbitrary PCI devices from a PCI backend to support PCI driver domains. @@ -198,3 +190,5 @@ source "drivers/pci/hotplug/Kconfig" source "drivers/pci/controller/Kconfig" source "drivers/pci/endpoint/Kconfig" source "drivers/pci/switch/Kconfig" + +endif diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 7a1c8a09efa5..4c0b47867258 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1168,7 +1168,8 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, const struct irq_affinity *affd) { static const struct irq_affinity msi_default_affd; - int vecs = -ENOSPC; + int msix_vecs = -ENOSPC; + int msi_vecs = -ENOSPC; if (flags & PCI_IRQ_AFFINITY) { if (!affd) @@ -1179,16 +1180,17 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } if (flags & PCI_IRQ_MSIX) { - vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, - affd); - if (vecs > 0) - return vecs; + msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, + max_vecs, affd); + if (msix_vecs > 0) + return msix_vecs; } if (flags & PCI_IRQ_MSI) { - vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); - if (vecs > 0) - return vecs; + msi_vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, + affd); + if (msi_vecs > 0) + return msi_vecs; } /* use legacy irq if allowed */ @@ -1199,7 +1201,9 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } } - return vecs; + if (msix_vecs == -ENOSPC) + return -ENOSPC; + return msi_vecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c9d8e3c837de..c25acace7d91 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6195,7 +6195,8 @@ static int __init pci_setup(char *str) } else if (!strncmp(str, "pcie_scan_all", 13)) { pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS); } else if (!strncmp(str, "disable_acs_redir=", 18)) { - disable_acs_redir_param = str + 18; + disable_acs_redir_param = + kstrdup(str + 18, GFP_KERNEL); } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/phy/qualcomm/phy-ath79-usb.c b/drivers/phy/qualcomm/phy-ath79-usb.c index 6fd6e07ab345..09a77e556ece 100644 --- a/drivers/phy/qualcomm/phy-ath79-usb.c +++ b/drivers/phy/qualcomm/phy-ath79-usb.c @@ -31,7 +31,7 @@ static int ath79_usb_phy_power_on(struct phy *phy) err = reset_control_deassert(priv->reset); if (err && priv->no_suspend_override) - reset_control_assert(priv->no_suspend_override); + reset_control_deassert(priv->no_suspend_override); return err; } @@ -69,7 +69,7 @@ static int ath79_usb_phy_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - priv->reset = devm_reset_control_get(&pdev->dev, "usb-phy"); + priv->reset = devm_reset_control_get(&pdev->dev, "phy"); if (IS_ERR(priv->reset)) return PTR_ERR(priv->reset); diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 77fdaa551977..a52c5bb35033 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -204,11 +204,11 @@ static struct phy *phy_gmii_sel_of_xlate(struct device *dev, if (args->args_count < 1) return ERR_PTR(-EINVAL); + if (!priv || !priv->if_phys) + return ERR_PTR(-ENODEV); if (priv->soc_data->features & BIT(PHY_GMII_SEL_RMII_IO_CLK_EN) && args->args_count < 2) return ERR_PTR(-EINVAL); - if (!priv || !priv->if_phys) - return ERR_PTR(-ENODEV); if (phy_id > priv->soc_data->num_ports) return ERR_PTR(-EINVAL); if (phy_id != priv->if_phys[phy_id - 1].id) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index e3b62c2ee8d1..5e2109c54c7c 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1009,7 +1009,7 @@ config INTEL_MFLD_THERMAL config INTEL_IPS tristate "Intel Intelligent Power Sharing" - depends on ACPI + depends on ACPI && PCI ---help--- Intel Calpella platforms support dynamic power sharing between the CPU and GPU, maximizing performance in a given TDP. This driver, @@ -1135,7 +1135,7 @@ config SAMSUNG_Q10 config APPLE_GMUX tristate "Apple Gmux Driver" - depends on ACPI + depends on ACPI && PCI depends on PNP depends on BACKLIGHT_CLASS_DEVICE depends on BACKLIGHT_APPLE=n || BACKLIGHT_APPLE @@ -1174,7 +1174,7 @@ config INTEL_SMARTCONNECT config INTEL_PMC_IPC tristate "Intel PMC IPC Driver" - depends on ACPI + depends on ACPI && PCI ---help--- This driver provides support for PMC control on some Intel platforms. The PMC is an ARC processor which defines IPC commands for communication diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index d137c480db46..aeb4a8b2e0af 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -53,7 +53,7 @@ config PTP_1588_CLOCK_QORIQ packets using the SO_TIMESTAMPING API. To compile this driver as a module, choose M here: the module - will be called ptp_qoriq. + will be called ptp-qoriq. config PTP_1588_CLOCK_IXP46X tristate "Intel IXP46x as PTP clock" diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile index 19efa9cfa950..677d1d178a3e 100644 --- a/drivers/ptp/Makefile +++ b/drivers/ptp/Makefile @@ -9,4 +9,6 @@ obj-$(CONFIG_PTP_1588_CLOCK_DTE) += ptp_dte.o obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o -obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp_qoriq.o +obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp-qoriq.o +ptp-qoriq-y += ptp_qoriq.o +ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index fdd49c26bbcc..43416b2e8a13 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -88,6 +88,49 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp) qoriq_write(®s->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2); } +static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index, + bool update_event) +{ + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + struct ptp_clock_event event; + void __iomem *reg_etts_l; + void __iomem *reg_etts_h; + u32 valid, stat, lo, hi; + + switch (index) { + case 0: + valid = ETS1_VLD; + reg_etts_l = ®s->etts_regs->tmr_etts1_l; + reg_etts_h = ®s->etts_regs->tmr_etts1_h; + break; + case 1: + valid = ETS2_VLD; + reg_etts_l = ®s->etts_regs->tmr_etts2_l; + reg_etts_h = ®s->etts_regs->tmr_etts2_h; + break; + default: + return -EINVAL; + } + + event.type = PTP_CLOCK_EXTTS; + event.index = index; + + do { + lo = qoriq_read(reg_etts_l); + hi = qoriq_read(reg_etts_h); + + if (update_event) { + event.timestamp = ((u64) hi) << 32; + event.timestamp |= lo; + ptp_clock_event(qoriq_ptp->clock, &event); + } + + stat = qoriq_read(®s->ctrl_regs->tmr_stat); + } while (qoriq_ptp->extts_fifo_support && (stat & valid)); + + return 0; +} + /* * Interrupt service routine */ @@ -98,33 +141,28 @@ static irqreturn_t isr(int irq, void *priv) struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; struct ptp_clock_event event; u64 ns; - u32 ack = 0, lo, hi, mask, val; + u32 ack = 0, lo, hi, mask, val, irqs; + + spin_lock(&qoriq_ptp->lock); val = qoriq_read(®s->ctrl_regs->tmr_tevent); + mask = qoriq_read(®s->ctrl_regs->tmr_temask); - if (val & ETS1) { + spin_unlock(&qoriq_ptp->lock); + + irqs = val & mask; + + if (irqs & ETS1) { ack |= ETS1; - hi = qoriq_read(®s->etts_regs->tmr_etts1_h); - lo = qoriq_read(®s->etts_regs->tmr_etts1_l); - event.type = PTP_CLOCK_EXTTS; - event.index = 0; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + extts_clean_up(qoriq_ptp, 0, true); } - if (val & ETS2) { + if (irqs & ETS2) { ack |= ETS2; - hi = qoriq_read(®s->etts_regs->tmr_etts2_h); - lo = qoriq_read(®s->etts_regs->tmr_etts2_l); - event.type = PTP_CLOCK_EXTTS; - event.index = 1; - event.timestamp = ((u64) hi) << 32; - event.timestamp |= lo; - ptp_clock_event(qoriq_ptp->clock, &event); + extts_clean_up(qoriq_ptp, 1, true); } - if (val & ALM2) { + if (irqs & ALM2) { ack |= ALM2; if (qoriq_ptp->alarm_value) { event.type = PTP_CLOCK_ALARM; @@ -136,13 +174,10 @@ static irqreturn_t isr(int irq, void *priv) ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval; hi = ns >> 32; lo = ns & 0xffffffff; - spin_lock(&qoriq_ptp->lock); qoriq_write(®s->alarm_regs->tmr_alarm2_l, lo); qoriq_write(®s->alarm_regs->tmr_alarm2_h, hi); - spin_unlock(&qoriq_ptp->lock); qoriq_ptp->alarm_value = ns; } else { - qoriq_write(®s->ctrl_regs->tmr_tevent, ALM2); spin_lock(&qoriq_ptp->lock); mask = qoriq_read(®s->ctrl_regs->tmr_temask); mask &= ~ALM2EN; @@ -153,7 +188,7 @@ static irqreturn_t isr(int irq, void *priv) } } - if (val & PP1) { + if (irqs & PP1) { ack |= PP1; event.type = PTP_CLOCK_PPS; ptp_clock_event(qoriq_ptp->clock, &event); @@ -260,7 +295,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps); struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; unsigned long flags; - u32 bit, mask; + u32 bit, mask = 0; switch (rq->type) { case PTP_CLK_REQ_EXTTS: @@ -274,32 +309,32 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp, default: return -EINVAL; } - spin_lock_irqsave(&qoriq_ptp->lock, flags); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); - if (on) - mask |= bit; - else - mask &= ~bit; - qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); - return 0; - case PTP_CLK_REQ_PPS: - spin_lock_irqsave(&qoriq_ptp->lock, flags); - mask = qoriq_read(®s->ctrl_regs->tmr_temask); if (on) - mask |= PP1EN; - else - mask &= ~PP1EN; - qoriq_write(®s->ctrl_regs->tmr_temask, mask); - spin_unlock_irqrestore(&qoriq_ptp->lock, flags); - return 0; + extts_clean_up(qoriq_ptp, rq->extts.index, false); - default: break; + case PTP_CLK_REQ_PPS: + bit = PP1EN; + break; + default: + return -EOPNOTSUPP; } - return -EOPNOTSUPP; + spin_lock_irqsave(&qoriq_ptp->lock, flags); + + mask = qoriq_read(®s->ctrl_regs->tmr_temask); + if (on) { + mask |= bit; + qoriq_write(®s->ctrl_regs->tmr_tevent, bit); + } else { + mask &= ~bit; + } + + qoriq_write(®s->ctrl_regs->tmr_temask, mask); + + spin_unlock_irqrestore(&qoriq_ptp->lock, flags); + return 0; } static const struct ptp_clock_info ptp_qoriq_caps = { @@ -436,11 +471,17 @@ static int qoriq_ptp_probe(struct platform_device *dev) err = -EINVAL; + qoriq_ptp->dev = &dev->dev; qoriq_ptp->caps = ptp_qoriq_caps; if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel)) qoriq_ptp->cksel = DEFAULT_CKSEL; + if (of_property_read_bool(node, "fsl,extts-fifo")) + qoriq_ptp->extts_fifo_support = true; + else + qoriq_ptp->extts_fifo_support = false; + if (of_property_read_u32(node, "fsl,tclk-period", &qoriq_ptp->tclk_period) || of_property_read_u32(node, @@ -532,6 +573,7 @@ static int qoriq_ptp_probe(struct platform_device *dev) } qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock); + ptp_qoriq_create_debugfs(qoriq_ptp); platform_set_drvdata(dev, qoriq_ptp); return 0; @@ -557,6 +599,7 @@ static int qoriq_ptp_remove(struct platform_device *dev) qoriq_write(®s->ctrl_regs->tmr_temask, 0); qoriq_write(®s->ctrl_regs->tmr_ctrl, 0); + ptp_qoriq_remove_debugfs(qoriq_ptp); ptp_clock_unregister(qoriq_ptp->clock); iounmap(qoriq_ptp->base); release_resource(qoriq_ptp->rsrc); diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c new file mode 100644 index 000000000000..970595021088 --- /dev/null +++ b/drivers/ptp/ptp_qoriq_debugfs.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright 2019 NXP + */ +#include +#include +#include + +static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + *val = ctrl & PP1L ? 1 : 0; + + return 0; +} + +static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + if (val == 0) + ctrl &= ~PP1L; + else + ctrl |= PP1L; + + qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get, + ptp_qoriq_fiper1_lpbk_set, "%llu\n"); + +static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + *val = ctrl & PP2L ? 1 : 0; + + return 0; +} + +static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val) +{ + struct qoriq_ptp *qoriq_ptp = data; + struct qoriq_ptp_registers *regs = &qoriq_ptp->regs; + u32 ctrl; + + ctrl = qoriq_read(®s->ctrl_regs->tmr_ctrl); + if (val == 0) + ctrl &= ~PP2L; + else + ctrl |= PP2L; + + qoriq_write(®s->ctrl_regs->tmr_ctrl, ctrl); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get, + ptp_qoriq_fiper2_lpbk_set, "%llu\n"); + +void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +{ + struct dentry *root; + + root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL); + if (IS_ERR(root)) + return; + if (!root) + goto err_root; + + qoriq_ptp->debugfs_root = root; + + if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root, + qoriq_ptp, &ptp_qoriq_fiper1_fops)) + goto err_node; + if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root, + qoriq_ptp, &ptp_qoriq_fiper2_fops)) + goto err_node; + return; + +err_node: + debugfs_remove_recursive(root); + qoriq_ptp->debugfs_root = NULL; +err_root: + dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n"); +} + +void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +{ + debugfs_remove_recursive(qoriq_ptp->debugfs_root); + qoriq_ptp->debugfs_root = NULL; +} diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index 183fc42a510a..2d7cd344f3bf 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -153,10 +153,15 @@ static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, const bool * ctx, struct irq_affinity *desc) { - int i, ret; + int i, ret, queue_idx = 0; for (i = 0; i < nvqs; ++i) { - vqs[i] = rp_find_vq(vdev, i, callbacks[i], names[i], + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = rp_find_vq(vdev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { ret = PTR_ERR(vqs[i]); diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 194ffd5c8580..039b2074db7e 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -60,7 +60,9 @@ static void sclp_cpu_capability_notify(struct work_struct *work) static void __ref sclp_cpu_change_notify(struct work_struct *work) { + lock_device_hotplug(); smp_rescan_cpus(); + unlock_device_hotplug(); } static void sclp_conf_receiver_fn(struct evbuf_header *evbuf) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 0ee026947f20..d65650ef6b41 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -741,11 +741,6 @@ struct qeth_discipline { struct qeth_ipa_cmd *cmd); }; -struct qeth_vlan_vid { - struct list_head list; - unsigned short vid; -}; - enum qeth_addr_disposition { QETH_DISP_ADDR_DELETE = 0, QETH_DISP_ADDR_DO_NOTHING = 1, @@ -792,8 +787,6 @@ struct qeth_card { wait_queue_head_t wait_q; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mutex vid_list_mutex; /* vid_list */ - struct list_head vid_list; DECLARE_HASHTABLE(mac_htable, 4); DECLARE_HASHTABLE(ip_htable, 4); DECLARE_HASHTABLE(ip_mc_htable, 4); @@ -802,7 +795,6 @@ struct qeth_card { unsigned long thread_start_mask; unsigned long thread_allowed_mask; unsigned long thread_running_mask; - struct task_struct *recovery_task; spinlock_t ip_lock; struct qeth_ipato ipato; struct list_head cmd_waiter_list; @@ -976,11 +968,8 @@ extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS]; struct net_device *qeth_clone_netdev(struct net_device *orig); struct qeth_card *qeth_get_card_by_busid(char *bus_id); -void qeth_set_recovery_task(struct qeth_card *); -void qeth_clear_recovery_task(struct qeth_card *); void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_wait_for_threads(struct qeth_card *, unsigned long); int qeth_do_run_thread(struct qeth_card *, unsigned long); void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long); void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long); @@ -1047,6 +1036,9 @@ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); +int qeth_open(struct net_device *dev); +int qeth_stop(struct net_device *dev); + int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e63e03143ca7..dcc06e48b70b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -193,23 +193,6 @@ const char *qeth_get_cardname_short(struct qeth_card *card) return "n/a"; } -void qeth_set_recovery_task(struct qeth_card *card) -{ - card->recovery_task = current; -} -EXPORT_SYMBOL_GPL(qeth_set_recovery_task); - -void qeth_clear_recovery_task(struct qeth_card *card) -{ - card->recovery_task = NULL; -} -EXPORT_SYMBOL_GPL(qeth_clear_recovery_task); - -static bool qeth_is_recovery_task(const struct qeth_card *card) -{ - return card->recovery_task == current; -} - void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask) { @@ -236,15 +219,6 @@ int qeth_threads_running(struct qeth_card *card, unsigned long threads) } EXPORT_SYMBOL_GPL(qeth_threads_running); -int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads) -{ - if (qeth_is_recovery_task(card)) - return 0; - return wait_event_interruptible(card->wait_q, - qeth_threads_running(card, threads) == 0); -} -EXPORT_SYMBOL_GPL(qeth_wait_for_threads); - void qeth_clear_working_pool_list(struct qeth_card *card) { struct qeth_buffer_pool_entry *pool_entry, *tmp; @@ -1430,7 +1404,6 @@ static void qeth_setup_card(struct qeth_card *card) spin_lock_init(&card->thread_mask_lock); mutex_init(&card->conf_mutex); mutex_init(&card->discipline_mutex); - mutex_init(&card->vid_list_mutex); INIT_WORK(&card->kernel_thread_starter, qeth_start_kernel_thread); INIT_LIST_HEAD(&card->cmd_waiter_list); init_waitqueue_head(&card->wait_q); @@ -3558,8 +3531,6 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, unsigned int qdio_err, card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init + count) % QDIO_MAX_BUFFERS_PER_Q; - netif_wake_queue(card->dev); - if (card->options.performance_stats) { int delta_t = qeth_get_micros(); delta_t -= card->perf_stats.cq_start_time; @@ -3928,7 +3899,6 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, { struct qdio_buffer *buffer = buf->buffer; bool is_first_elem = true; - int flush_cnt = 0; __skb_queue_tail(&buf->skb_list, skb); @@ -3949,24 +3919,22 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, if (!queue->do_pack) { QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); - /* set state to PRIMED -> will be flushed */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; } else { QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); if (queue->card->options.performance_stats) queue->card->perf_stats.skbs_sent_pack++; - if (buf->next_element_to_fill >= - QETH_MAX_BUFFER_ELEMENTS(queue->card)) { - /* - * packed buffer if full -> set state PRIMED - * -> will be flushed - */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; - } + + /* If the buffer still has free elements, keep using it. */ + if (buf->next_element_to_fill < + QETH_MAX_BUFFER_ELEMENTS(queue->card)) + return 0; } - return flush_cnt; + + /* flush out the buffer */ + atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); + queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % + QDIO_MAX_BUFFERS_PER_Q; + return 1; } static int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, @@ -3982,7 +3950,6 @@ static int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) return -EBUSY; - queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q; qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); return 0; @@ -4040,10 +4007,9 @@ int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, } } } - tmp = qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); - queue->next_buf_to_fill = (queue->next_buf_to_fill + tmp) % - QDIO_MAX_BUFFERS_PER_Q; - flush_count += tmp; + + flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset, + hd_len); if (flush_count) qeth_flush_buffers(queue, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) @@ -5154,13 +5120,6 @@ retriable: *carrier_ok = true; } - if (qeth_netdev_is_registered(card->dev)) { - if (*carrier_ok) - netif_carrier_on(card->dev); - else - netif_carrier_off(card->dev); - } - card->options.ipa4.supported_funcs = 0; card->options.ipa6.supported_funcs = 0; card->options.adp.supported_funcs = 0; @@ -5937,9 +5896,6 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!card) return -ENODEV; - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; - if (card->info.type == QETH_CARD_TYPE_OSN) return -EPERM; @@ -6250,8 +6206,6 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev, /* Check if we can obtain more accurate information. */ /* If QUERY_CARD_INFO command is not supported or fails, */ /* just return the heuristics that was filled above. */ - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; rc = qeth_query_card_info(card, &carrier_info); if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ return 0; @@ -6534,8 +6488,6 @@ static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on) return (rc_ipv6) ? rc_ipv6 : rc_ipv4; } -#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ - NETIF_F_IPV6_CSUM | NETIF_F_TSO6) /** * qeth_enable_hw_features() - (Re-)Enable HW functions for device features * @dev: a net_device @@ -6545,17 +6497,20 @@ void qeth_enable_hw_features(struct net_device *dev) struct qeth_card *card = dev->ml_priv; netdev_features_t features; - rtnl_lock(); features = dev->features; - /* force-off any feature that needs an IPA sequence. + /* force-off any feature that might need an IPA sequence. * netdev_update_features() will restart them. */ - dev->features &= ~QETH_HW_FEATURES; + dev->features &= ~dev->hw_features; + /* toggle VLAN filter, so that VIDs are re-programmed: */ + if (IS_LAYER2(card) && IS_VM_NIC(card)) { + dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + dev->wanted_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } netdev_update_features(dev); if (features != dev->features) dev_warn(&card->gdev->dev, "Device recovery failed to restore all offload features\n"); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(qeth_enable_hw_features); @@ -6624,10 +6579,7 @@ netdev_features_t qeth_fix_features(struct net_device *dev, features &= ~NETIF_F_TSO; if (!qeth_is_supported6(card, IPA_OUTBOUND_TSO)) features &= ~NETIF_F_TSO6; - /* if the card isn't up, remove features that require hw changes */ - if (card->state == CARD_STATE_DOWN || - card->state == CARD_STATE_RECOVER) - features &= ~QETH_HW_FEATURES; + QETH_DBF_HEX(SETUP, 2, &features, sizeof(features)); return features; } @@ -6659,6 +6611,46 @@ netdev_features_t qeth_features_check(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(qeth_features_check); +int qeth_open(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethopen"); + if (card->state == CARD_STATE_UP) + return 0; + if (card->state != CARD_STATE_SOFTSETUP) + return -ENODEV; + + if (qdio_stop_irq(CARD_DDEV(card), 0) < 0) + return -EIO; + + card->data.state = CH_STATE_UP; + card->state = CARD_STATE_UP; + netif_start_queue(dev); + + napi_enable(&card->napi); + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); + return 0; +} +EXPORT_SYMBOL_GPL(qeth_open); + +int qeth_stop(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethstop"); + netif_tx_disable(dev); + if (card->state == CARD_STATE_UP) { + card->state = CARD_STATE_SOFTSETUP; + napi_disable(&card->napi); + } + return 0; +} +EXPORT_SYMBOL_GPL(qeth_stop); + static int __init qeth_core_init(void) { int rc; diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 1ab321926f64..c7fb14a61eed 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -81,6 +81,7 @@ enum qeth_card_types { #define IS_IQD(card) ((card)->info.type == QETH_CARD_TYPE_IQD) #define IS_OSD(card) ((card)->info.type == QETH_CARD_TYPE_OSD) +#define IS_OSM(card) ((card)->info.type == QETH_CARD_TYPE_OSM) #define IS_OSN(card) ((card)->info.type == QETH_CARD_TYPE_OSN) #define IS_VM_NIC(card) ((card)->info.guestlan) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index f108d4b44605..82f50cc30b0a 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -25,7 +25,6 @@ #include "qeth_l2.h" static int qeth_l2_set_offline(struct ccwgroup_device *); -static int qeth_l2_stop(struct net_device *); static void qeth_bridgeport_query_support(struct qeth_card *card); static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); @@ -98,8 +97,7 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC); if (rc == 0) { dev_info(&card->gdev->dev, - "MAC address %pM successfully registered on device %s\n", - mac, card->dev->name); + "MAC address %pM successfully registered\n", mac); } else { switch (rc) { case -EEXIST: @@ -263,75 +261,28 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, qeth_l2_send_setdelvlan_cb, NULL)); } -static void qeth_l2_process_vlans(struct qeth_card *card) -{ - struct qeth_vlan_vid *id; - - QETH_CARD_TEXT(card, 3, "L2prcvln"); - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN); - } - mutex_unlock(&card->vid_list_mutex); -} - static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; - struct qeth_vlan_vid *id; - int rc; QETH_CARD_TEXT_(card, 4, "aid:%d", vid); if (!vid) return 0; - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "aidREC"); - return 0; - } - id = kmalloc(sizeof(*id), GFP_KERNEL); - if (id) { - id->vid = vid; - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); - if (rc) { - kfree(id); - return rc; - } - mutex_lock(&card->vid_list_mutex); - list_add_tail(&id->list, &card->vid_list); - mutex_unlock(&card->vid_list_mutex); - } else { - return -ENOMEM; - } - return 0; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); } static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { - struct qeth_vlan_vid *id, *tmpid = NULL; struct qeth_card *card = dev->ml_priv; - int rc = 0; QETH_CARD_TEXT_(card, 4, "kid:%d", vid); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); + if (!vid) return 0; - } - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - if (id->vid == vid) { - list_del(&id->list); - tmpid = id; - break; - } - } - mutex_unlock(&card->vid_list_mutex); - if (tmpid) { - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); - kfree(tmpid); - } - return rc; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); } static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) @@ -343,9 +294,8 @@ static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) if (card->read.state == CH_STATE_UP && card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - qeth_l2_stop(card->dev); + if (recovery_mode && !IS_OSN(card)) { + qeth_stop(card->dev); } else { rtnl_lock(); dev_close(card->dev); @@ -460,6 +410,26 @@ out: return 0; } +static void qeth_l2_register_dev_addr(struct qeth_card *card) +{ + if (!is_valid_ether_addr(card->dev->dev_addr)) + qeth_l2_request_initial_mac(card); + + if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr)) + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; +} + +static int qeth_l2_validate_addr(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + return eth_validate_addr(dev); + + QETH_CARD_TEXT(card, 4, "nomacadr"); + return -EPERM; +} + static int qeth_l2_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@ -479,39 +449,22 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "setmcREC"); - return -ERESTARTSYS; - } - - /* avoid racing against concurrent state change: */ - if (!mutex_trylock(&card->conf_mutex)) - return -EAGAIN; - - if (!qeth_card_hw_is_reachable(card)) { - ether_addr_copy(dev->dev_addr, addr->sa_data); - goto out_unlock; - } - /* don't register the same address twice */ if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - goto out_unlock; + return 0; /* add the new address, switch over, drop the old */ rc = qeth_l2_send_setmac(card, addr->sa_data); if (rc) - goto out_unlock; + return rc; ether_addr_copy(old_addr, dev->dev_addr); ether_addr_copy(dev->dev_addr, addr->sa_data); if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) qeth_l2_remove_mac(card, old_addr); card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - -out_unlock: - mutex_unlock(&card->conf_mutex); - return rc; + return 0; } static void qeth_promisc_to_bridge(struct qeth_card *card) @@ -586,9 +539,6 @@ static void qeth_l2_set_rx_mode(struct net_device *dev) return; QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return; spin_lock_bh(&card->mclock); @@ -712,62 +662,6 @@ tx_drop: return NETDEV_TX_OK; } -static int __qeth_l2_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - - if ((card->info.type != QETH_CARD_TYPE_OSN) && - (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))) { - QETH_CARD_TEXT(card, 4, "nomacadr"); - return -EPERM; - } - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; -} - -static int qeth_l2_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l2_open(dev); -} - -static int qeth_l2_stop(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; -} - static const struct device_type qeth_l2_devtype = { .name = "qeth_layer2", .groups = qeth_l2_attr_groups, @@ -783,7 +677,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) if (rc) return rc; } - INIT_LIST_HEAD(&card->vid_list); + hash_init(card->mac_htable); card->info.hwtrap = 0; qeth_l2_vnicc_set_defaults(card); @@ -822,12 +716,12 @@ static const struct ethtool_ops qeth_l2_osn_ops = { }; static const struct net_device_ops qeth_l2_netdev_ops = { - .ndo_open = qeth_l2_open, - .ndo_stop = qeth_l2_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, .ndo_features_check = qeth_features_check, - .ndo_validate_addr = eth_validate_addr, + .ndo_validate_addr = qeth_l2_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_do_ioctl, .ndo_set_mac_address = qeth_l2_set_mac_address, @@ -842,9 +736,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) { int rc; - if (qeth_netdev_is_registered(card->dev)) - return 0; - card->dev->priv_flags |= IFF_UNICAST_FLT; card->dev->netdev_ops = &qeth_l2_netdev_ops; if (card->info.type == QETH_CARD_TYPE_OSN) { @@ -855,10 +746,13 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) card->dev->needed_headroom = sizeof(struct qeth_hdr); } - if (card->info.type == QETH_CARD_TYPE_OSM) + if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; - else + } else { + if (!IS_VM_NIC(card)) + card->dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->features |= NETIF_F_SG; @@ -892,8 +786,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok) PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); } - if (!is_valid_ether_addr(card->dev->dev_addr)) - qeth_l2_request_initial_mac(card); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); if (!rc && carrier_ok) @@ -927,6 +819,7 @@ static void qeth_l2_trace_features(struct qeth_card *card) static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@ -948,13 +841,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n"); - rc = qeth_l2_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - - if (card->info.type != QETH_CARD_TYPE_OSN && - !qeth_l2_send_setmac(card, card->dev->dev_addr)) - card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + qeth_l2_register_dev_addr(card); if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && @@ -984,11 +871,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) goto out_remove; } - if (card->info.type != QETH_CARD_TYPE_OSN) - qeth_l2_process_vlans(card); - - netif_tx_disable(card->dev); - rc = qeth_init_qdio_queues(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc); @@ -999,17 +881,31 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) qeth_set_allowed_threads(card, 0xffffffff, 0); - qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - __qeth_l2_open(card->dev); - qeth_l2_set_rx_mode(card->dev); - } else { - rtnl_lock(); - dev_open(card->dev, NULL); - rtnl_unlock(); + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l2_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { + rtnl_lock(); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode && !IS_OSN(card)) { + if (!qeth_l2_validate_addr(dev)) { + qeth_open(dev); + qeth_l2_set_rx_mode(dev); + } + } else { + dev_open(dev, NULL); + } } + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); @@ -1049,7 +945,11 @@ static int __qeth_l2_set_offline(struct ccwgroup_device *cgdev, QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *)); + rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@ -1090,7 +990,6 @@ static int qeth_l2_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l2_set_offline(card->gdev, 1); rc = __qeth_l2_set_online(card->gdev, 1); if (!rc) @@ -1101,7 +1000,6 @@ static int qeth_l2_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@ -1122,7 +1020,6 @@ static int qeth_l2_pm_suspend(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); - netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@ -1152,7 +1049,6 @@ static int qeth_l2_pm_resume(struct ccwgroup_device *gdev) rc = __qeth_l2_set_online(card->gdev, 0); qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 42a7cdc59b76..59535ecb1487 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -40,7 +40,6 @@ static int qeth_l3_set_offline(struct ccwgroup_device *); -static int qeth_l3_stop(struct net_device *); static void qeth_l3_set_rx_mode(struct net_device *dev); static int qeth_l3_register_addr_entry(struct qeth_card *, struct qeth_ipaddr *); @@ -1281,10 +1280,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, QETH_CARD_TEXT_(card, 4, "kid:%d", vid); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); - return 0; - } clear_bit(vid, card->active_vlans); qeth_l3_set_rx_mode(dev); return 0; @@ -1410,7 +1405,7 @@ static void qeth_l3_stop_card(struct qeth_card *card, int recovery_mode) card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { if (recovery_mode) - qeth_l3_stop(card->dev); + qeth_stop(card->dev); else { rtnl_lock(); dev_close(card->dev); @@ -1473,9 +1468,7 @@ static void qeth_l3_set_rx_mode(struct net_device *dev) int i, rc; QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return; + if (!card->options.sniffer) { spin_lock_bh(&card->mclock); @@ -2100,56 +2093,6 @@ tx_drop: return NETDEV_TX_OK; } -static int __qeth_l3_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; -} - -static int qeth_l3_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l3_open(dev); -} - -static int qeth_l3_stop(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; -} - static const struct ethtool_ops qeth_l3_ethtool_ops = { .get_link = ethtool_op_get_link, .get_strings = qeth_core_get_strings, @@ -2193,8 +2136,8 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, } static const struct net_device_ops qeth_l3_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, @@ -2208,8 +2151,8 @@ static const struct net_device_ops qeth_l3_netdev_ops = { }; static const struct net_device_ops qeth_l3_osa_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_features_check = qeth_l3_osa_features_check, @@ -2229,9 +2172,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok) unsigned int headroom; int rc; - if (qeth_netdev_is_registered(card->dev)) - return 0; - if (card->info.type == QETH_CARD_TYPE_OSD || card->info.type == QETH_CARD_TYPE_OSX) { if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) || @@ -2347,6 +2287,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@ -2364,10 +2305,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) goto out_remove; } - rc = qeth_l3_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)) @@ -2397,7 +2334,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) if (rc) QETH_DBF_TEXT_(SETUP, 2, "5err%04x", rc); } - netif_tx_disable(card->dev); rc = qeth_init_qdio_queues(card); if (rc) { @@ -2410,14 +2346,27 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_l3_recover_ip(card); - qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l3_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { rtnl_lock(); - if (recovery_mode) { - __qeth_l3_open(card->dev); - qeth_l3_set_rx_mode(card->dev); - } else { - dev_open(card->dev, NULL); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode) { + qeth_open(dev); + qeth_l3_set_rx_mode(dev); + } else { + dev_open(dev, NULL); + } } rtnl_unlock(); } @@ -2459,7 +2408,11 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev, QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *)); + rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@ -2506,7 +2459,6 @@ static int qeth_l3_recover(void *ptr) QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l3_set_offline(card->gdev, 1); rc = __qeth_l3_set_online(card->gdev, 1); if (!rc) @@ -2517,7 +2469,6 @@ static int qeth_l3_recover(void *ptr) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@ -2527,7 +2478,6 @@ static int qeth_l3_pm_suspend(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); - netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@ -2557,7 +2507,6 @@ static int qeth_l3_pm_resume(struct ccwgroup_device *gdev) rc = __qeth_l3_set_online(card->gdev, 0); qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index fc9dbad476c0..ae1d56da671d 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -635,7 +635,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, { struct virtio_ccw_device *vcdev = to_vc_device(vdev); unsigned long *indicatorp = NULL; - int ret, i; + int ret, i, queue_idx = 0; struct ccw1 *ccw; ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL); @@ -643,8 +643,14 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, return -ENOMEM; for (i = 0; i < nvqs; ++i) { - vqs[i] = virtio_ccw_setup_vq(vdev, i, callbacks[i], names[i], - ctx ? ctx[i] : false, ccw); + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = virtio_ccw_setup_vq(vdev, queue_idx++, callbacks[i], + names[i], ctx ? ctx[i] : false, + ccw); if (IS_ERR(vqs[i])) { ret = PTR_ERR(vqs[i]); vqs[i] = NULL; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 634ddb90e7aa..7e56a11836c1 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1747,11 +1747,10 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_sectors = (shost->sg_tablesize * 8) + 112; } - error = dma_set_max_seg_size(&pdev->dev, - (aac->adapter_info.options & AAC_OPT_NEW_COMM) ? - (shost->max_sectors << 9) : 65536); - if (error) - goto out_deinit; + if (aac->adapter_info.options & AAC_OPT_NEW_COMM) + shost->max_segment_size = shost->max_sectors << 9; + else + shost->max_segment_size = 65536; /* * Firmware printf works only with older firmware. diff --git a/drivers/scsi/csiostor/csio_attr.c b/drivers/scsi/csiostor/csio_attr.c index 8a004036e3d7..9bd2bd8dc2be 100644 --- a/drivers/scsi/csiostor/csio_attr.c +++ b/drivers/scsi/csiostor/csio_attr.c @@ -594,12 +594,12 @@ csio_vport_create(struct fc_vport *fc_vport, bool disable) } fc_vport_set_state(fc_vport, FC_VPORT_INITIALIZING); + ln->fc_vport = fc_vport; if (csio_fcoe_alloc_vnp(hw, ln)) goto error; *(struct csio_lnode **)fc_vport->dd_data = ln; - ln->fc_vport = fc_vport; if (!fc_vport->node_name) fc_vport->node_name = wwn_to_u64(csio_ln_wwnn(ln)); if (!fc_vport->port_name) diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c index 8a20411699d9..75e1273a44b3 100644 --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c @@ -1144,7 +1144,7 @@ static void ddp_clear_map(struct cxgbi_device *cdev, struct cxgbi_ppm *ppm, } static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, - unsigned int tid, int pg_idx, bool reply) + unsigned int tid, int pg_idx) { struct sk_buff *skb = alloc_wr(sizeof(struct cpl_set_tcb_field), 0, GFP_KERNEL); @@ -1160,7 +1160,7 @@ static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, req = (struct cpl_set_tcb_field *)skb->head; req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); - req->reply = V_NO_REPLY(reply ? 0 : 1); + req->reply = V_NO_REPLY(1); req->cpu_idx = 0; req->word = htons(31); req->mask = cpu_to_be64(0xF0000000); @@ -1177,11 +1177,10 @@ static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, * @tid: connection id * @hcrc: header digest enabled * @dcrc: data digest enabled - * @reply: request reply from h/w * set up the iscsi digest settings for a connection identified by tid */ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid, - int hcrc, int dcrc, int reply) + int hcrc, int dcrc) { struct sk_buff *skb = alloc_wr(sizeof(struct cpl_set_tcb_field), 0, GFP_KERNEL); @@ -1197,7 +1196,7 @@ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid, req = (struct cpl_set_tcb_field *)skb->head; req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); - req->reply = V_NO_REPLY(reply ? 0 : 1); + req->reply = V_NO_REPLY(1); req->cpu_idx = 0; req->word = htons(31); req->mask = cpu_to_be64(0x0F000000); diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index 49f8028ac524..d26f50af00ea 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1548,16 +1548,22 @@ static void do_set_tcb_rpl(struct cxgbi_device *cdev, struct sk_buff *skb) struct cxgbi_sock *csk; csk = lookup_tid(t, tid); - if (!csk) + if (!csk) { pr_err("can't find conn. for tid %u.\n", tid); + return; + } log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,%lx,%u, status 0x%x.\n", csk, csk->state, csk->flags, csk->tid, rpl->status); - if (rpl->status != CPL_ERR_NONE) + if (rpl->status != CPL_ERR_NONE) { pr_err("csk 0x%p,%u, SET_TCB_RPL status %u.\n", csk, tid, rpl->status); + csk->err = -EINVAL; + } + + complete(&csk->cmpl); __kfree_skb(skb); } @@ -1983,7 +1989,7 @@ static int ddp_set_map(struct cxgbi_ppm *ppm, struct cxgbi_sock *csk, } static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, unsigned int tid, - int pg_idx, bool reply) + int pg_idx) { struct sk_buff *skb; struct cpl_set_tcb_field *req; @@ -1999,7 +2005,7 @@ static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, unsigned int tid, req = (struct cpl_set_tcb_field *)skb->head; INIT_TP_WR(req, csk->tid); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, csk->tid)); - req->reply_ctrl = htons(NO_REPLY_V(reply) | QUEUENO_V(csk->rss_qid)); + req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid)); req->word_cookie = htons(0); req->mask = cpu_to_be64(0x3 << 8); req->val = cpu_to_be64(pg_idx << 8); @@ -2008,12 +2014,15 @@ static int ddp_setup_conn_pgidx(struct cxgbi_sock *csk, unsigned int tid, log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK, "csk 0x%p, tid 0x%x, pg_idx %u.\n", csk, csk->tid, pg_idx); + reinit_completion(&csk->cmpl); cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); - return 0; + wait_for_completion(&csk->cmpl); + + return csk->err; } static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid, - int hcrc, int dcrc, int reply) + int hcrc, int dcrc) { struct sk_buff *skb; struct cpl_set_tcb_field *req; @@ -2031,7 +2040,7 @@ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid, req = (struct cpl_set_tcb_field *)skb->head; INIT_TP_WR(req, tid); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); - req->reply_ctrl = htons(NO_REPLY_V(reply) | QUEUENO_V(csk->rss_qid)); + req->reply_ctrl = htons(NO_REPLY_V(0) | QUEUENO_V(csk->rss_qid)); req->word_cookie = htons(0); req->mask = cpu_to_be64(0x3 << 4); req->val = cpu_to_be64(((hcrc ? ULP_CRC_HEADER : 0) | @@ -2041,8 +2050,11 @@ static int ddp_setup_conn_digest(struct cxgbi_sock *csk, unsigned int tid, log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK, "csk 0x%p, tid 0x%x, crc %d,%d.\n", csk, csk->tid, hcrc, dcrc); + reinit_completion(&csk->cmpl); cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); - return 0; + wait_for_completion(&csk->cmpl); + + return csk->err; } static struct cxgbi_ppm *cdev2ppm(struct cxgbi_device *cdev) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 75f876409fb9..245742557c03 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -573,6 +573,7 @@ static struct cxgbi_sock *cxgbi_sock_create(struct cxgbi_device *cdev) skb_queue_head_init(&csk->receive_queue); skb_queue_head_init(&csk->write_queue); timer_setup(&csk->retry_timer, NULL, 0); + init_completion(&csk->cmpl); rwlock_init(&csk->callback_lock); csk->cdev = cdev; csk->flags = 0; @@ -2251,14 +2252,14 @@ int cxgbi_set_conn_param(struct iscsi_cls_conn *cls_conn, if (!err && conn->hdrdgst_en) err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid, conn->hdrdgst_en, - conn->datadgst_en, 0); + conn->datadgst_en); break; case ISCSI_PARAM_DATADGST_EN: err = iscsi_set_param(cls_conn, param, buf, buflen); if (!err && conn->datadgst_en) err = csk->cdev->csk_ddp_setup_digest(csk, csk->tid, conn->hdrdgst_en, - conn->datadgst_en, 0); + conn->datadgst_en); break; case ISCSI_PARAM_MAX_R2T: return iscsi_tcp_set_max_r2t(conn, buf); @@ -2384,7 +2385,7 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session, ppm = csk->cdev->cdev2ppm(csk->cdev); err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, - ppm->tformat.pgsz_idx_dflt, 0); + ppm->tformat.pgsz_idx_dflt); if (err < 0) return err; diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index 5d5d8b50d842..1917ff57651d 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -149,6 +149,7 @@ struct cxgbi_sock { struct sk_buff_head receive_queue; struct sk_buff_head write_queue; struct timer_list retry_timer; + struct completion cmpl; int err; rwlock_t callback_lock; void *user_data; @@ -490,9 +491,9 @@ struct cxgbi_device { struct cxgbi_ppm *, struct cxgbi_task_tag_info *); int (*csk_ddp_setup_digest)(struct cxgbi_sock *, - unsigned int, int, int, int); + unsigned int, int, int); int (*csk_ddp_setup_pgidx)(struct cxgbi_sock *, - unsigned int, int, bool); + unsigned int, int); void (*csk_release_offload_resources)(struct cxgbi_sock *); int (*csk_rx_pdu_ready)(struct cxgbi_sock *, struct sk_buff *); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index e2420a810e99..c92b3822c408 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2507,6 +2507,12 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) sha->sas_port[i] = &hisi_hba->port[i].sas_port; } + if (hisi_hba->prot_mask) { + dev_info(dev, "Registering for DIF/DIX prot_mask=0x%x\n", + prot_mask); + scsi_host_set_prot(hisi_hba->shost, prot_mask); + } + rc = scsi_add_host(shost, dev); if (rc) goto err_out_ha; @@ -2519,12 +2525,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) goto err_out_register_ha; - if (hisi_hba->prot_mask) { - dev_info(dev, "Registering for DIF/DIX prot_mask=0x%x\n", - prot_mask); - scsi_host_set_prot(hisi_hba->shost, prot_mask); - } - scsi_scan_host(shost); return 0; diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 68b90c4f79a3..1727d0c71b12 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -576,6 +576,13 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) shost->max_lun = ~0; shost->max_cmd_len = MAX_COMMAND_SIZE; + /* turn on DIF support */ + scsi_host_set_prot(shost, + SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIF_TYPE3_PROTECTION); + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + err = scsi_add_host(shost, &pdev->dev); if (err) goto err_shost; @@ -663,13 +670,6 @@ static int isci_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_host_alloc; } pci_info->hosts[i] = h; - - /* turn on DIF support */ - scsi_host_set_prot(to_shost(h), - SHOST_DIF_TYPE1_PROTECTION | - SHOST_DIF_TYPE2_PROTECTION | - SHOST_DIF_TYPE3_PROTECTION); - scsi_host_set_guard(to_shost(h), SHOST_DIX_GUARD_CRC); } err = isci_setup_interrupts(pdev); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 4c66b19e6199..8c9f79042228 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -297,7 +297,8 @@ lpfc_nvme_localport_delete(struct nvme_fc_local_port *localport) lport); /* release any threads waiting for the unreg to complete */ - complete(&lport->lport_unreg_done); + if (lport->vport->localport) + complete(lport->lport_unreg_cmp); } /* lpfc_nvme_remoteport_delete @@ -2545,7 +2546,8 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) */ void lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, - struct lpfc_nvme_lport *lport) + struct lpfc_nvme_lport *lport, + struct completion *lport_unreg_cmp) { #if (IS_ENABLED(CONFIG_NVME_FC)) u32 wait_tmo; @@ -2557,8 +2559,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport, */ wait_tmo = msecs_to_jiffies(LPFC_NVME_WAIT_TMO * 1000); while (true) { - ret = wait_for_completion_timeout(&lport->lport_unreg_done, - wait_tmo); + ret = wait_for_completion_timeout(lport_unreg_cmp, wait_tmo); if (unlikely(!ret)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, "6176 Lport %p Localport %p wait " @@ -2592,12 +2593,12 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) struct lpfc_nvme_lport *lport; struct lpfc_nvme_ctrl_stat *cstat; int ret; + DECLARE_COMPLETION_ONSTACK(lport_unreg_cmp); if (vport->nvmei_support == 0) return; localport = vport->localport; - vport->localport = NULL; lport = (struct lpfc_nvme_lport *)localport->private; cstat = lport->cstat; @@ -2608,13 +2609,14 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) /* lport's rport list is clear. Unregister * lport and release resources. */ - init_completion(&lport->lport_unreg_done); + lport->lport_unreg_cmp = &lport_unreg_cmp; ret = nvme_fc_unregister_localport(localport); /* Wait for completion. This either blocks * indefinitely or succeeds */ - lpfc_nvme_lport_unreg_wait(vport, lport); + lpfc_nvme_lport_unreg_wait(vport, lport, &lport_unreg_cmp); + vport->localport = NULL; kfree(cstat); /* Regardless of the unregister upcall response, clear diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index cfd4719be25c..b234d0298994 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -50,7 +50,7 @@ struct lpfc_nvme_ctrl_stat { /* Declare nvme-based local and remote port definitions. */ struct lpfc_nvme_lport { struct lpfc_vport *vport; - struct completion lport_unreg_done; + struct completion *lport_unreg_cmp; /* Add stats counters here */ struct lpfc_nvme_ctrl_stat *cstat; atomic_t fc4NvmeLsRequests; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6245f442d784..95fee83090eb 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1003,7 +1003,8 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport) struct lpfc_nvmet_tgtport *tport = targetport->private; /* release any threads waiting for the unreg to complete */ - complete(&tport->tport_unreg_done); + if (tport->phba->targetport) + complete(tport->tport_unreg_cmp); } static void @@ -1692,6 +1693,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) struct lpfc_nvmet_tgtport *tgtp; struct lpfc_queue *wq; uint32_t qidx; + DECLARE_COMPLETION_ONSTACK(tport_unreg_cmp); if (phba->nvmet_support == 0) return; @@ -1701,9 +1703,9 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) wq = phba->sli4_hba.nvme_wq[qidx]; lpfc_nvmet_wqfull_flush(phba, wq, NULL); } - init_completion(&tgtp->tport_unreg_done); + tgtp->tport_unreg_cmp = &tport_unreg_cmp; nvmet_fc_unregister_targetport(phba->targetport); - wait_for_completion_timeout(&tgtp->tport_unreg_done, 5); + wait_for_completion_timeout(&tport_unreg_cmp, 5); lpfc_nvmet_cleanup_io_context(phba); } phba->targetport = NULL; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 1aaff63f1f41..0ec1082ce7ef 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -34,7 +34,7 @@ /* Used for NVME Target */ struct lpfc_nvmet_tgtport { struct lpfc_hba *phba; - struct completion tport_unreg_done; + struct completion *tport_unreg_cmp; /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ atomic_t rcv_ls_req_in; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 12fd74761ae0..2242e9b3ca12 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -9407,6 +9407,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, cmnd = CMD_XMIT_SEQUENCE64_CR; if (phba->link_flag & LS_LOOPBACK_MODE) bf_set(wqe_xo, &wqe->xmit_sequence.wge_ctl, 1); + /* fall through */ case CMD_XMIT_SEQUENCE64_CR: /* word3 iocb=io_tag32 wqe=reserved */ wqe->xmit_sequence.rsvd3 = 0; @@ -13528,6 +13529,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe) case FC_STATUS_RQ_BUF_LEN_EXCEEDED: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "2537 Receive Frame Truncated!!\n"); + /* fall through */ case FC_STATUS_RQ_SUCCESS: spin_lock_irqsave(&phba->hbalock, iflags); lpfc_sli4_rq_release(hrq, drq); @@ -13937,7 +13939,7 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, case FC_STATUS_RQ_BUF_LEN_EXCEEDED: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6126 Receive Frame Truncated!!\n"); - /* Drop thru */ + /* fall through */ case FC_STATUS_RQ_SUCCESS: spin_lock_irqsave(&phba->hbalock, iflags); lpfc_sli4_rq_release(hrq, drq); @@ -14849,7 +14851,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax) eq->entry_count); if (eq->entry_count < 256) return -EINVAL; - /* otherwise default to smallest count (drop through) */ + /* fall through - otherwise default to smallest count */ case 256: bf_set(lpfc_eq_context_count, &eq_create->u.request.context, LPFC_EQ_CNT_256); @@ -14980,7 +14982,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq, LPFC_CQ_CNT_WORD7); break; } - /* Fall Thru */ + /* fall through */ default: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0361 Unsupported CQ count: " @@ -14991,7 +14993,7 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq, status = -EINVAL; goto out; } - /* otherwise default to smallest count (drop through) */ + /* fall through - otherwise default to smallest count */ case 256: bf_set(lpfc_cq_context_count, &cq_create->u.request.context, LPFC_CQ_CNT_256); @@ -15151,7 +15153,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp, LPFC_CQ_CNT_WORD7); break; } - /* Fall Thru */ + /* fall through */ default: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3118 Bad CQ count. (%d)\n", @@ -15160,7 +15162,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp, status = -EINVAL; goto out; } - /* otherwise default to smallest (drop thru) */ + /* fall through - otherwise default to smallest */ case 256: bf_set(lpfc_mbx_cq_create_set_cqe_cnt, &cq_set->u.request, LPFC_CQ_CNT_256); @@ -15432,7 +15434,7 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq, status = -EINVAL; goto out; } - /* otherwise default to smallest count (drop through) */ + /* fall through - otherwise default to smallest count */ case 16: bf_set(lpfc_mq_context_ring_size, &mq_create_ext->u.request.context, @@ -15851,7 +15853,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -EINVAL; goto out; } - /* otherwise default to smallest count (drop through) */ + /* fall through - otherwise default to smallest count */ case 512: bf_set(lpfc_rq_context_rqe_count, &rq_create->u.request.context, @@ -15988,7 +15990,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -EINVAL; goto out; } - /* otherwise default to smallest count (drop through) */ + /* fall through - otherwise default to smallest count */ case 512: bf_set(lpfc_rq_context_rqe_count, &rq_create->u.request.context, diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 7eaa400f6328..fcbff83c0097 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6236,7 +6236,7 @@ megasas_set_dma_mask(struct megasas_instance *instance) instance->consistent_mask_64bit = true; dev_info(&pdev->dev, "%s bit DMA mask and %s bit consistent mask\n", - ((*pdev->dev.dma_mask == DMA_BIT_MASK(64)) ? "63" : "32"), + ((*pdev->dev.dma_mask == DMA_BIT_MASK(63)) ? "63" : "32"), (instance->consistent_mask_64bit ? "63" : "32")); return 0; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index a9a25f0eaf6f..647f48a28f85 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -175,7 +175,8 @@ megasas_clear_intr_fusion(struct megasas_instance *instance) /* * Check if it is our interrupt */ - status = readl(®s->outbound_intr_status); + status = megasas_readl(instance, + ®s->outbound_intr_status); if (status & 1) { writel(status, ®s->outbound_intr_status); diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 4c5a3d23e010..084f2fcced0a 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -657,7 +657,7 @@ static int pm8001_dev_found_notify(struct domain_device *dev) if (dev->dev_type == SAS_SATA_DEV) { pm8001_device->attached_phy = dev->rphy->identify.phy_identifier; - flag = 1; /* directly sata*/ + flag = 1; /* directly sata */ } } /*register this device to HBA*/ PM8001_DISC_DBG(pm8001_ha, pm8001_printk("Found device\n")); diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 4da660c1c431..6d6d6013e35b 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -953,6 +953,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) qedi_ep = ep->dd_data; if (qedi_ep->state == EP_STATE_IDLE || + qedi_ep->state == EP_STATE_OFLDCONN_NONE || qedi_ep->state == EP_STATE_OFLDCONN_FAILED) return -1; @@ -1035,6 +1036,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) switch (qedi_ep->state) { case EP_STATE_OFLDCONN_START: + case EP_STATE_OFLDCONN_NONE: goto ep_release_conn; case EP_STATE_OFLDCONN_FAILED: break; @@ -1225,6 +1227,7 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) if (!is_valid_ether_addr(&path_data->mac_addr[0])) { QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n"); + qedi_ep->state = EP_STATE_OFLDCONN_NONE; ret = -EIO; goto set_path_exit; } diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index 11260776212f..892d70d54553 100644 --- a/drivers/scsi/qedi/qedi_iscsi.h +++ b/drivers/scsi/qedi/qedi_iscsi.h @@ -59,6 +59,7 @@ enum { EP_STATE_OFLDCONN_FAILED = 0x2000, EP_STATE_CONNECT_FAILED = 0x4000, EP_STATE_DISCONN_TIMEDOUT = 0x8000, + EP_STATE_OFLDCONN_NONE = 0x10000, }; struct qedi_conn; diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index a414f51302b7..6856dfdfa473 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -4248,7 +4248,7 @@ qla1280_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->devnum = devnum; /* specifies microcode load address */ #ifdef QLA_64BIT_PTR - if (dma_set_mask(&ha->pdev->dev, DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&ha->pdev->dev, DMA_BIT_MASK(64))) { if (dma_set_mask(&ha->pdev->dev, DMA_BIT_MASK(32))) { printk(KERN_WARNING "scsi(%li): Unable to set a " "suitable DMA mask - aborting\n", ha->host_no); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 26b93c563f92..d1fc4958222a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4394,6 +4394,8 @@ typedef struct scsi_qla_host { uint16_t n2n_id; struct list_head gpnid_list; struct fab_scan scan; + + unsigned int irq_offset; } scsi_qla_host_t; struct qla27xx_image_status { diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 30d3090842f8..8507c43b918c 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3446,6 +3446,7 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) "Adjusted Max no of queues pairs: %d.\n", ha->max_qpairs); } } + vha->irq_offset = desc.pre_vectors; ha->msix_entries = kcalloc(ha->msix_count, sizeof(struct qla_msix_entry), GFP_KERNEL); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index ea69dafc9774..c6ef83d0d99b 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6939,7 +6939,7 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost) if (USER_CTRL_IRQ(vha->hw)) rc = blk_mq_map_queues(qmap); else - rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, 0); + rc = blk_mq_pci_map_queues(qmap, vha->hw->pdev, vha->irq_offset); return rc; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index cfdfcda28072..a77bfb224248 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -7232,6 +7232,8 @@ static int qla4xxx_sysfs_ddb_tgt_create(struct scsi_qla_host *ha, rc = qla4xxx_copy_from_fwddb_param(fnode_sess, fnode_conn, fw_ddb_entry); + if (rc) + goto free_sess; ql4_printk(KERN_INFO, ha, "%s: sysfs entry %s created\n", __func__, fnode_sess->dev.kobj.name); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b13cc9288ba0..6d65ac584eba 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1842,8 +1842,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) blk_queue_segment_boundary(q, shost->dma_boundary); dma_set_seg_boundary(dev, shost->dma_boundary); - blk_queue_max_segment_size(q, - min(shost->max_segment_size, dma_get_max_seg_size(dev))); + blk_queue_max_segment_size(q, shost->max_segment_size); + dma_set_max_seg_size(dev, shost->max_segment_size); /* * Set a reasonable default alignment: The larger of 32-byte (dword), diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index a2b4179bfdf7..7639df91b110 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -80,8 +80,22 @@ static int scsi_dev_type_resume(struct device *dev, if (err == 0) { pm_runtime_disable(dev); - pm_runtime_set_active(dev); + err = pm_runtime_set_active(dev); pm_runtime_enable(dev); + + /* + * Forcibly set runtime PM status of request queue to "active" + * to make sure we can again get requests from the queue + * (see also blk_pm_peek_request()). + * + * The resume hook will correct runtime PM status of the disk. + */ + if (!err && scsi_is_sdev_device(dev)) { + struct scsi_device *sdev = to_scsi_device(dev); + + if (sdev->request_queue->dev) + blk_set_runtime_active(sdev->request_queue); + } } return err; @@ -140,16 +154,6 @@ static int scsi_bus_resume_common(struct device *dev, else fn = NULL; - /* - * Forcibly set runtime PM status of request queue to "active" to - * make sure we can again get requests from the queue (see also - * blk_pm_peek_request()). - * - * The resume hook will correct runtime PM status of the disk. - */ - if (scsi_is_sdev_device(dev) && pm_runtime_suspended(dev)) - blk_set_runtime_active(to_scsi_device(dev)->request_queue); - if (fn) { async_schedule_domain(fn, dev, &scsi_sd_pm_domain); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a1a44f52e0e8..b2da8a00ec33 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -206,6 +206,12 @@ cache_type_store(struct device *dev, struct device_attribute *attr, sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; + /* + * Ensure WP, DPOFUA, and RESERVED fields are cleared in + * received mode parameter buffer before doing MODE SELECT. + */ + data.device_specific = 0; + if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, SD_MAX_RETRIES, &data, &sshdr)) { if (scsi_sense_valid(&sshdr)) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 7bde6c809442..f564af8949e8 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -323,7 +323,7 @@ static inline void pqi_device_remove_start(struct pqi_scsi_dev *device) static inline bool pqi_device_in_remove(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) { - return device->in_remove & !ctrl_info->in_shutdown; + return device->in_remove && !ctrl_info->in_shutdown; } static inline void pqi_schedule_rescan_worker_with_delay( diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index dd65fea07687..6d176815e6ce 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -195,7 +195,7 @@ enum ufs_desc_def_size { QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90, QUERY_DESC_UNIT_DEF_SIZE = 0x23, QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06, - QUERY_DESC_GEOMETRY_DEF_SIZE = 0x44, + QUERY_DESC_GEOMETRY_DEF_SIZE = 0x48, QUERY_DESC_POWER_DEF_SIZE = 0x62, QUERY_DESC_HEALTH_DEF_SIZE = 0x25, }; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9ba7671b84f8..2ddf24466a62 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -108,13 +108,19 @@ int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, const char *prefix) { - u8 *regs; + u32 *regs; + size_t pos; + + if (offset % 4 != 0 || len % 4 != 0) /* keep readl happy */ + return -EINVAL; regs = kzalloc(len, GFP_KERNEL); if (!regs) return -ENOMEM; - memcpy_fromio(regs, hba->mmio_base + offset, len); + for (pos = 0; pos < len; pos += 4) + regs[pos / 4] = ufshcd_readl(hba, offset + pos); + ufshcd_hex_dump(prefix, regs, len); kfree(regs); @@ -8001,6 +8007,8 @@ out: trace_ufshcd_system_resume(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); + if (!ret) + hba->is_sys_suspended = false; return ret; } EXPORT_SYMBOL(ufshcd_system_resume); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index a0802de8c3a1..6f5afab7c1a1 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -248,10 +248,10 @@ static void ion_dma_buf_detatch(struct dma_buf *dmabuf, struct ion_dma_buf_attachment *a = attachment->priv; struct ion_buffer *buffer = dmabuf->priv; - free_duped_table(a->table); mutex_lock(&buffer->lock); list_del(&a->list); mutex_unlock(&buffer->lock); + free_duped_table(a->table); kfree(a); } diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 28cbd6b3d26c..dfee6985efa6 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -35,6 +35,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ diff --git a/drivers/staging/rtl8723bs/include/ieee80211.h b/drivers/staging/rtl8723bs/include/ieee80211.h index bcc8dfa8e672..9efb4dcb9d3a 100644 --- a/drivers/staging/rtl8723bs/include/ieee80211.h +++ b/drivers/staging/rtl8723bs/include/ieee80211.h @@ -850,18 +850,18 @@ enum ieee80211_state { #define IP_FMT "%pI4" #define IP_ARG(x) (x) -extern __inline int is_multicast_mac_addr(const u8 *addr) +static inline int is_multicast_mac_addr(const u8 *addr) { return ((addr[0] != 0xff) && (0x01 & addr[0])); } -extern __inline int is_broadcast_mac_addr(const u8 *addr) +static inline int is_broadcast_mac_addr(const u8 *addr) { return ((addr[0] == 0xff) && (addr[1] == 0xff) && (addr[2] == 0xff) && \ (addr[3] == 0xff) && (addr[4] == 0xff) && (addr[5] == 0xff)); } -extern __inline int is_zero_mac_addr(const u8 *addr) +static inline int is_zero_mac_addr(const u8 *addr) { return ((addr[0] == 0x00) && (addr[1] == 0x00) && (addr[2] == 0x00) && \ (addr[3] == 0x00) && (addr[4] == 0x00) && (addr[5] == 0x00)); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c index 9e17ec651bde..53f5a1cb4636 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_core.c @@ -446,6 +446,7 @@ remote_event_wait(wait_queue_head_t *wq, struct remote_event *event) static inline void remote_event_signal_local(wait_queue_head_t *wq, struct remote_event *event) { + event->fired = 1; event->armed = 0; wake_up_all(wq); } diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c index 70c854d939ce..3d0badc34825 100644 --- a/drivers/staging/wilc1000/host_interface.c +++ b/drivers/staging/wilc1000/host_interface.c @@ -36,7 +36,7 @@ struct wilc_op_mode { struct wilc_reg_frame { bool reg; u8 reg_id; - __le32 frame_type; + __le16 frame_type; } __packed; struct wilc_drv_handler { @@ -1744,7 +1744,6 @@ int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len, result = wilc_send_config_pkt(vif, WILC_SET_CFG, wid_list, ARRAY_SIZE(wid_list), wilc_get_vif_idx(vif)); - kfree(gtk_key); } else if (mode == WILC_STATION_MODE) { struct wid wid; @@ -1754,9 +1753,9 @@ int wilc_add_rx_gtk(struct wilc_vif *vif, const u8 *rx_gtk, u8 gtk_key_len, wid.val = (u8 *)gtk_key; result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1, wilc_get_vif_idx(vif)); - kfree(gtk_key); } + kfree(gtk_key); return result; } diff --git a/drivers/staging/wilc1000/wilc_wlan.c b/drivers/staging/wilc1000/wilc_wlan.c index 3c5e9e030cad..489e5a5038f8 100644 --- a/drivers/staging/wilc1000/wilc_wlan.c +++ b/drivers/staging/wilc1000/wilc_wlan.c @@ -1252,21 +1252,22 @@ static u32 init_chip(struct net_device *dev) ret = wilc->hif_func->hif_read_reg(wilc, 0x1118, ®); if (!ret) { netdev_err(dev, "fail read reg 0x1118\n"); - return ret; + goto release; } reg |= BIT(0); ret = wilc->hif_func->hif_write_reg(wilc, 0x1118, reg); if (!ret) { netdev_err(dev, "fail write reg 0x1118\n"); - return ret; + goto release; } ret = wilc->hif_func->hif_write_reg(wilc, 0xc0000, 0x71); if (!ret) { netdev_err(dev, "fail write reg 0xc0000\n"); - return ret; + goto release; } } +release: release_bus(wilc, WILC_BUS_RELEASE_ONLY); return ret; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 984941e036c8..bd15a564fe24 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -714,7 +714,7 @@ static int __init iscsi_target_init_module(void) sizeof(struct iscsi_queue_req), __alignof__(struct iscsi_queue_req), 0, NULL); if (!lio_qr_cache) { - pr_err("nable to kmem_cache_create() for" + pr_err("Unable to kmem_cache_create() for" " lio_qr_cache\n"); goto bitmap_out; } diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 1e6d24943565..5831e0eecea1 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -148,7 +148,7 @@ struct tcmu_dev { size_t ring_size; struct mutex cmdr_lock; - struct list_head cmdr_queue; + struct list_head qfull_queue; uint32_t dbi_max; uint32_t dbi_thresh; @@ -159,6 +159,7 @@ struct tcmu_dev { struct timer_list cmd_timer; unsigned int cmd_time_out; + struct list_head inflight_queue; struct timer_list qfull_timer; int qfull_time_out; @@ -179,7 +180,7 @@ struct tcmu_dev { struct tcmu_cmd { struct se_cmd *se_cmd; struct tcmu_dev *tcmu_dev; - struct list_head cmdr_queue_entry; + struct list_head queue_entry; uint16_t cmd_id; @@ -192,6 +193,7 @@ struct tcmu_cmd { unsigned long deadline; #define TCMU_CMD_BIT_EXPIRED 0 +#define TCMU_CMD_BIT_INFLIGHT 1 unsigned long flags; }; /* @@ -586,7 +588,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) if (!tcmu_cmd) return NULL; - INIT_LIST_HEAD(&tcmu_cmd->cmdr_queue_entry); + INIT_LIST_HEAD(&tcmu_cmd->queue_entry); tcmu_cmd->se_cmd = se_cmd; tcmu_cmd->tcmu_dev = udev; @@ -915,11 +917,13 @@ setup_timer: return 0; tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo)); - mod_timer(timer, tcmu_cmd->deadline); + if (!timer_pending(timer)) + mod_timer(timer, tcmu_cmd->deadline); + return 0; } -static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) +static int add_to_qfull_queue(struct tcmu_cmd *tcmu_cmd) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; unsigned int tmo; @@ -942,7 +946,7 @@ static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) if (ret) return ret; - list_add_tail(&tcmu_cmd->cmdr_queue_entry, &udev->cmdr_queue); + list_add_tail(&tcmu_cmd->queue_entry, &udev->qfull_queue); pr_debug("adding cmd %u on dev %s to ring space wait queue\n", tcmu_cmd->cmd_id, udev->name); return 0; @@ -999,7 +1003,7 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt); command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); - if (!list_empty(&udev->cmdr_queue)) + if (!list_empty(&udev->qfull_queue)) goto queue; mb = udev->mb_addr; @@ -1096,13 +1100,16 @@ static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err) UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); tcmu_flush_dcache_range(mb, sizeof(*mb)); + list_add_tail(&tcmu_cmd->queue_entry, &udev->inflight_queue); + set_bit(TCMU_CMD_BIT_INFLIGHT, &tcmu_cmd->flags); + /* TODO: only if FLUSH and FUA? */ uio_event_notify(&udev->uio_info); return 0; queue: - if (add_to_cmdr_queue(tcmu_cmd)) { + if (add_to_qfull_queue(tcmu_cmd)) { *scsi_err = TCM_OUT_OF_RESOURCES; return -1; } @@ -1145,6 +1152,8 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) goto out; + list_del_init(&cmd->queue_entry); + tcmu_cmd_reset_dbi_cur(cmd); if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { @@ -1194,9 +1203,29 @@ out: tcmu_free_cmd(cmd); } +static void tcmu_set_next_deadline(struct list_head *queue, + struct timer_list *timer) +{ + struct tcmu_cmd *tcmu_cmd, *tmp_cmd; + unsigned long deadline = 0; + + list_for_each_entry_safe(tcmu_cmd, tmp_cmd, queue, queue_entry) { + if (!time_after(jiffies, tcmu_cmd->deadline)) { + deadline = tcmu_cmd->deadline; + break; + } + } + + if (deadline) + mod_timer(timer, deadline); + else + del_timer(timer); +} + static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) { struct tcmu_mailbox *mb; + struct tcmu_cmd *cmd; int handled = 0; if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { @@ -1210,7 +1239,6 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) { struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned; - struct tcmu_cmd *cmd; tcmu_flush_dcache_range(entry, sizeof(*entry)); @@ -1243,7 +1271,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) /* no more pending commands */ del_timer(&udev->cmd_timer); - if (list_empty(&udev->cmdr_queue)) { + if (list_empty(&udev->qfull_queue)) { /* * no more pending or waiting commands so try to * reclaim blocks if needed. @@ -1252,6 +1280,8 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) tcmu_global_max_blocks) schedule_delayed_work(&tcmu_unmap_work, 0); } + } else if (udev->cmd_time_out) { + tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer); } return handled; @@ -1271,7 +1301,7 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) if (!time_after(jiffies, cmd->deadline)) return 0; - is_running = list_empty(&cmd->cmdr_queue_entry); + is_running = test_bit(TCMU_CMD_BIT_INFLIGHT, &cmd->flags); se_cmd = cmd->se_cmd; if (is_running) { @@ -1287,9 +1317,9 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) * target_complete_cmd will translate this to LUN COMM FAILURE */ scsi_status = SAM_STAT_CHECK_CONDITION; + list_del_init(&cmd->queue_entry); } else { - list_del_init(&cmd->cmdr_queue_entry); - + list_del_init(&cmd->queue_entry); idr_remove(&udev->commands, id); tcmu_free_cmd(cmd); scsi_status = SAM_STAT_TASK_SET_FULL; @@ -1372,7 +1402,8 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&udev->node); INIT_LIST_HEAD(&udev->timedout_entry); - INIT_LIST_HEAD(&udev->cmdr_queue); + INIT_LIST_HEAD(&udev->qfull_queue); + INIT_LIST_HEAD(&udev->inflight_queue); idr_init(&udev->commands); timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0); @@ -1383,7 +1414,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) return &udev->se_dev; } -static bool run_cmdr_queue(struct tcmu_dev *udev, bool fail) +static bool run_qfull_queue(struct tcmu_dev *udev, bool fail) { struct tcmu_cmd *tcmu_cmd, *tmp_cmd; LIST_HEAD(cmds); @@ -1391,15 +1422,15 @@ static bool run_cmdr_queue(struct tcmu_dev *udev, bool fail) sense_reason_t scsi_ret; int ret; - if (list_empty(&udev->cmdr_queue)) + if (list_empty(&udev->qfull_queue)) return true; pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail); - list_splice_init(&udev->cmdr_queue, &cmds); + list_splice_init(&udev->qfull_queue, &cmds); - list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, cmdr_queue_entry) { - list_del_init(&tcmu_cmd->cmdr_queue_entry); + list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, queue_entry) { + list_del_init(&tcmu_cmd->queue_entry); pr_debug("removing cmd %u on dev %s from queue\n", tcmu_cmd->cmd_id, udev->name); @@ -1437,14 +1468,13 @@ static bool run_cmdr_queue(struct tcmu_dev *udev, bool fail) * cmd was requeued, so just put all cmds back in * the queue */ - list_splice_tail(&cmds, &udev->cmdr_queue); + list_splice_tail(&cmds, &udev->qfull_queue); drained = false; - goto done; + break; } } - if (list_empty(&udev->cmdr_queue)) - del_timer(&udev->qfull_timer); -done: + + tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer); return drained; } @@ -1454,7 +1484,7 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) mutex_lock(&udev->cmdr_lock); tcmu_handle_completions(udev); - run_cmdr_queue(udev, false); + run_qfull_queue(udev, false); mutex_unlock(&udev->cmdr_lock); return 0; @@ -1982,7 +2012,7 @@ static void tcmu_block_dev(struct tcmu_dev *udev) /* complete IO that has executed successfully */ tcmu_handle_completions(udev); /* fail IO waiting to be queued */ - run_cmdr_queue(udev, true); + run_qfull_queue(udev, true); unlock: mutex_unlock(&udev->cmdr_lock); @@ -1997,7 +2027,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) mutex_lock(&udev->cmdr_lock); idr_for_each_entry(&udev->commands, cmd, i) { - if (!list_empty(&cmd->cmdr_queue_entry)) + if (!test_bit(TCMU_CMD_BIT_INFLIGHT, &cmd->flags)) continue; pr_debug("removing cmd %u on dev %s from ring (is expired %d)\n", @@ -2006,6 +2036,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) idr_remove(&udev->commands, i); if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + list_del_init(&cmd->queue_entry); if (err_level == 1) { /* * Userspace was not able to start the @@ -2666,6 +2697,10 @@ static void check_timedout_devices(void) mutex_lock(&udev->cmdr_lock); idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); + + tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer); + tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer); + mutex_unlock(&udev->cmdr_lock); spin_lock_bh(&timed_out_udevs_lock); diff --git a/drivers/thermal/intel/int340x_thermal/Kconfig b/drivers/thermal/intel/int340x_thermal/Kconfig index 0582bd12a239..0ca908d12750 100644 --- a/drivers/thermal/intel/int340x_thermal/Kconfig +++ b/drivers/thermal/intel/int340x_thermal/Kconfig @@ -4,7 +4,7 @@ config INT340X_THERMAL tristate "ACPI INT340X thermal drivers" - depends on X86 && ACPI + depends on X86 && ACPI && PCI select THERMAL_GOV_USER_SPACE select ACPI_THERMAL_REL select ACPI_FAN diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c index 284cf2c5a8fd..8e1cf4d789be 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c @@ -84,7 +84,12 @@ static ssize_t power_limit_##index##_##suffix##_show(struct device *dev, \ struct pci_dev *pci_dev; \ struct platform_device *pdev; \ struct proc_thermal_device *proc_dev; \ -\ + \ + if (proc_thermal_emum_mode == PROC_THERMAL_NONE) { \ + dev_warn(dev, "Attempted to get power limit before device was initialized!\n"); \ + return 0; \ + } \ + \ if (proc_thermal_emum_mode == PROC_THERMAL_PLATFORM_DEV) { \ pdev = to_platform_device(dev); \ proc_dev = platform_get_drvdata(pdev); \ @@ -298,11 +303,6 @@ static int proc_thermal_add(struct device *dev, *priv = proc_priv; ret = proc_thermal_read_ppcc(proc_priv); - if (!ret) { - ret = sysfs_create_group(&dev->kobj, - &power_limit_attribute_group); - - } if (ret) return ret; @@ -316,8 +316,7 @@ static int proc_thermal_add(struct device *dev, proc_priv->int340x_zone = int340x_thermal_zone_add(adev, ops); if (IS_ERR(proc_priv->int340x_zone)) { - ret = PTR_ERR(proc_priv->int340x_zone); - goto remove_group; + return PTR_ERR(proc_priv->int340x_zone); } else ret = 0; @@ -331,9 +330,6 @@ static int proc_thermal_add(struct device *dev, remove_zone: int340x_thermal_zone_remove(proc_priv->int340x_zone); -remove_group: - sysfs_remove_group(&proc_priv->dev->kobj, - &power_limit_attribute_group); return ret; } @@ -364,7 +360,10 @@ static int int3401_add(struct platform_device *pdev) platform_set_drvdata(pdev, proc_priv); proc_thermal_emum_mode = PROC_THERMAL_PLATFORM_DEV; - return 0; + dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PLATFORM_DEV\n"); + + return sysfs_create_group(&pdev->dev.kobj, + &power_limit_attribute_group); } static int int3401_remove(struct platform_device *pdev) @@ -423,7 +422,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, proc_priv->soc_dts = intel_soc_dts_iosf_init( INTEL_SOC_DTS_INTERRUPT_MSI, 2, 0); - if (proc_priv->soc_dts && pdev->irq) { + if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) { ret = pci_enable_msi(pdev); if (!ret) { ret = request_threaded_irq(pdev->irq, NULL, @@ -441,7 +440,10 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "No auxiliary DTSs enabled\n"); } - return 0; + dev_info(&pdev->dev, "Creating sysfs group for PROC_THERMAL_PCI\n"); + + return sysfs_create_group(&pdev->dev.kobj, + &power_limit_attribute_group); } static void proc_thermal_pci_remove(struct pci_dev *pdev) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 4164414d4c64..8bdf42bc8fc8 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -597,6 +597,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, /* too large for caller's buffer */ ret = -EOVERFLOW; } else { + __set_current_state(TASK_RUNNING); if (copy_to_user(buf, rbuf->buf, rbuf->count)) ret = -EFAULT; else diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 189ab1212d9a..e441221e04b9 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1070,15 +1070,16 @@ int serial8250_register_8250_port(struct uart_8250_port *up) ret = 0; } - } - /* Initialise interrupt backoff work if required */ - if (up->overrun_backoff_time_ms > 0) { - uart->overrun_backoff_time_ms = up->overrun_backoff_time_ms; - INIT_DELAYED_WORK(&uart->overrun_backoff, - serial_8250_overrun_backoff_work); - } else { - uart->overrun_backoff_time_ms = 0; + /* Initialise interrupt backoff work if required */ + if (up->overrun_backoff_time_ms > 0) { + uart->overrun_backoff_time_ms = + up->overrun_backoff_time_ms; + INIT_DELAYED_WORK(&uart->overrun_backoff, + serial_8250_overrun_backoff_work); + } else { + uart->overrun_backoff_time_ms = 0; + } } mutex_unlock(&serial_mutex); diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 241a48e5052c..debdd1b9e01a 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1697,7 +1697,7 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios, } /* ask the core to calculate the divisor */ - baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 16); + baud = uart_get_baud_rate(port, termios, old, 50, port->uartclk / 4); spin_lock_irqsave(&sport->port.lock, flags); diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index a72d6d9fb983..38016609c7fa 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -225,7 +225,7 @@ static unsigned int qcom_geni_serial_get_mctrl(struct uart_port *uport) unsigned int mctrl = TIOCM_DSR | TIOCM_CAR; u32 geni_ios; - if (uart_console(uport) || !uart_cts_enabled(uport)) { + if (uart_console(uport)) { mctrl |= TIOCM_CTS; } else { geni_ios = readl_relaxed(uport->membase + SE_GENI_IOS); @@ -241,7 +241,7 @@ static void qcom_geni_serial_set_mctrl(struct uart_port *uport, { u32 uart_manual_rfr = 0; - if (uart_console(uport) || !uart_cts_enabled(uport)) + if (uart_console(uport)) return; if (!(mctrl & TIOCM_RTS)) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index d4cca5bdaf1c..5c01bb6d1c24 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -550,10 +550,12 @@ static int uart_put_char(struct tty_struct *tty, unsigned char c) int ret = 0; circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + if (port && uart_circ_chars_free(circ) != 0) { circ->buf[circ->head] = c; circ->head = (circ->head + 1) & (UART_XMIT_SIZE - 1); @@ -586,11 +588,13 @@ static int uart_write(struct tty_struct *tty, return -EL3HLT; } - circ = &state->xmit; - if (!circ->buf) - return 0; - port = uart_port_lock(state, flags); + circ = &state->xmit; + if (!circ->buf) { + uart_port_unlock(port, flags); + return 0; + } + while (port) { c = CIRC_SPACE_TO_END(circ->head, circ->tail, UART_XMIT_SIZE); if (count < c) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 23c6fd238422..21ffcce16927 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2189,7 +2189,8 @@ static int tiocsti(struct tty_struct *tty, char __user *p) ld = tty_ldisc_ref_wait(tty); if (!ld) return -EIO; - ld->ops->receive_buf(tty, &ch, &mbz, 1); + if (ld->ops->receive_buf) + ld->ops->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); return 0; } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 41ec8e5010f3..bba75560d11e 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1272,6 +1272,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (con_is_visible(vc)) update_screen(vc); vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num); + notify_update(vc); return err; } @@ -2764,8 +2765,8 @@ rescan_last_byte: con_flush(vc, draw_from, draw_to, &draw_x); vc_uniscr_debug_check(vc); console_conditional_schedule(); - console_unlock(); notify_update(vc); + console_unlock(); return n; } @@ -2884,8 +2885,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) unsigned char c; static DEFINE_SPINLOCK(printing_lock); const ushort *start; - ushort cnt = 0; - ushort myx; + ushort start_x, cnt; int kmsg_console; /* console busy or not yet initialized */ @@ -2898,10 +2898,6 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) if (kmsg_console && vc_cons_allocated(kmsg_console - 1)) vc = vc_cons[kmsg_console - 1].d; - /* read `x' only after setting currcons properly (otherwise - the `x' macro will read the x of the foreground console). */ - myx = vc->vc_x; - if (!vc_cons_allocated(fg_console)) { /* impossible */ /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */ @@ -2916,53 +2912,41 @@ static void vt_console_print(struct console *co, const char *b, unsigned count) hide_cursor(vc); start = (ushort *)vc->vc_pos; - - /* Contrived structure to try to emulate original need_wrap behaviour - * Problems caused when we have need_wrap set on '\n' character */ + start_x = vc->vc_x; + cnt = 0; while (count--) { c = *b++; if (c == 10 || c == 13 || c == 8 || vc->vc_need_wrap) { - if (cnt > 0) { - if (con_is_visible(vc)) - vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x); - vc->vc_x += cnt; - if (vc->vc_need_wrap) - vc->vc_x--; - cnt = 0; - } + if (cnt && con_is_visible(vc)) + vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, start_x); + cnt = 0; if (c == 8) { /* backspace */ bs(vc); start = (ushort *)vc->vc_pos; - myx = vc->vc_x; + start_x = vc->vc_x; continue; } if (c != 13) lf(vc); cr(vc); start = (ushort *)vc->vc_pos; - myx = vc->vc_x; + start_x = vc->vc_x; if (c == 10 || c == 13) continue; } + vc_uniscr_putc(vc, c); scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos); notify_write(vc, c); cnt++; - if (myx == vc->vc_cols - 1) { - vc->vc_need_wrap = 1; - continue; - } - vc->vc_pos += 2; - myx++; - } - if (cnt > 0) { - if (con_is_visible(vc)) - vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, vc->vc_x); - vc->vc_x += cnt; - if (vc->vc_x == vc->vc_cols) { - vc->vc_x--; + if (vc->vc_x == vc->vc_cols - 1) { vc->vc_need_wrap = 1; + } else { + vc->vc_pos += 2; + vc->vc_x++; } } + if (cnt && con_is_visible(vc)) + vc->vc_sw->con_putcs(vc, start, cnt, vc->vc_y, start_x); set_cursor(vc); notify_update(vc); diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index e81de9ca8729..9b45aa422e69 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -316,7 +316,8 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->usbmisc_data)) return PTR_ERR(data->usbmisc_data); - if (of_usb_get_phy_mode(dev->of_node) == USBPHY_INTERFACE_MODE_HSIC) { + if ((of_usb_get_phy_mode(dev->of_node) == USBPHY_INTERFACE_MODE_HSIC) + && data->usbmisc_data) { pdata.flags |= CI_HDRC_IMX_IS_HSIC; data->usbmisc_data->hsic = 1; data->pinctrl = devm_pinctrl_get(dev); diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c index dc7f7fd71684..c12ac56606c3 100644 --- a/drivers/usb/core/ledtrig-usbport.c +++ b/drivers/usb/core/ledtrig-usbport.c @@ -119,11 +119,6 @@ static const struct attribute_group ports_group = { .attrs = ports_attrs, }; -static const struct attribute_group *ports_groups[] = { - &ports_group, - NULL -}; - /*************************************** * Adding & removing ports ***************************************/ @@ -307,6 +302,7 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action, static int usbport_trig_activate(struct led_classdev *led_cdev) { struct usbport_trig_data *usbport_data; + int err; usbport_data = kzalloc(sizeof(*usbport_data), GFP_KERNEL); if (!usbport_data) @@ -315,6 +311,9 @@ static int usbport_trig_activate(struct led_classdev *led_cdev) /* List of ports */ INIT_LIST_HEAD(&usbport_data->ports); + err = sysfs_create_group(&led_cdev->dev->kobj, &ports_group); + if (err) + goto err_free; usb_for_each_dev(usbport_data, usbport_trig_add_usb_dev_ports); usbport_trig_update_count(usbport_data); @@ -322,8 +321,11 @@ static int usbport_trig_activate(struct led_classdev *led_cdev) usbport_data->nb.notifier_call = usbport_trig_notify; led_set_trigger_data(led_cdev, usbport_data); usb_register_notify(&usbport_data->nb); - return 0; + +err_free: + kfree(usbport_data); + return err; } static void usbport_trig_deactivate(struct led_classdev *led_cdev) @@ -335,6 +337,8 @@ static void usbport_trig_deactivate(struct led_classdev *led_cdev) usbport_trig_remove_port(usbport_data, port); } + sysfs_remove_group(&led_cdev->dev->kobj, &ports_group); + usb_unregister_notify(&usbport_data->nb); kfree(usbport_data); @@ -344,7 +348,6 @@ static struct led_trigger usbport_led_trigger = { .name = "usbport", .activate = usbport_trig_activate, .deactivate = usbport_trig_deactivate, - .groups = ports_groups, }; static int __init usbport_trig_init(void) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 68ad75a7460d..55ef3cc2701b 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -261,7 +261,7 @@ static void dwc2_gadget_wkup_alert_handler(struct dwc2_hsotg *hsotg) if (gintsts2 & GINTSTS2_WKUP_ALERT_INT) { dev_dbg(hsotg->dev, "%s: Wkup_Alert_Int\n", __func__); - dwc2_clear_bit(hsotg, GINTSTS2, GINTSTS2_WKUP_ALERT_INT); + dwc2_set_bit(hsotg, GINTSTS2, GINTSTS2_WKUP_ALERT_INT); dwc2_set_bit(hsotg, DCTL, DCTL_RMTWKUPSIG); } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 07bd31bb2f8a..bed2ff42780b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -177,6 +177,7 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, req->started = false; list_del(&req->list); req->remaining = 0; + req->needs_extra_trb = false; if (req->request.status == -EINPROGRESS) req->request.status = status; @@ -1984,6 +1985,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; + dwc->link_state = DWC3_LINK_STATE_SS_DIS; dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); @@ -3379,6 +3381,8 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); + synchronize_irq(dwc->irq_gadget); + return 0; } diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 9cdef108fb1b..ed68a4860b7d 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -838,7 +838,7 @@ static struct usb_function *source_sink_alloc_func( ss = kzalloc(sizeof(*ss), GFP_KERNEL); if (!ss) - return NULL; + return ERR_PTR(-ENOMEM); ss_opts = container_of(fi, struct f_ss_opts, func_inst); diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c index f26109eafdbf..66ec1fdf9fe7 100644 --- a/drivers/usb/host/ehci-mv.c +++ b/drivers/usb/host/ehci-mv.c @@ -302,3 +302,4 @@ MODULE_AUTHOR("Chao Xie "); MODULE_AUTHOR("Neil Zhang "); MODULE_ALIAS("mv-ehci"); MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(of, ehci_mv_dt_ids); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1ab2a6191013..77ef4c481f3c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1783,6 +1783,10 @@ static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode) int result; u16 val; + result = usb_autopm_get_interface(serial->interface); + if (result) + return result; + val = (mode << 8) | (priv->gpio_output << 4) | priv->gpio_value; result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), @@ -1795,6 +1799,8 @@ static int ftdi_set_bitmode(struct usb_serial_port *port, u8 mode) val, result); } + usb_autopm_put_interface(serial->interface); + return result; } @@ -1846,9 +1852,15 @@ static int ftdi_read_cbus_pins(struct usb_serial_port *port) unsigned char *buf; int result; + result = usb_autopm_get_interface(serial->interface); + if (result) + return result; + buf = kmalloc(1, GFP_KERNEL); - if (!buf) + if (!buf) { + usb_autopm_put_interface(serial->interface); return -ENOMEM; + } result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), @@ -1863,6 +1875,7 @@ static int ftdi_read_cbus_pins(struct usb_serial_port *port) } kfree(buf); + usb_autopm_put_interface(serial->interface); return result; } diff --git a/drivers/usb/serial/keyspan_usa26msg.h b/drivers/usb/serial/keyspan_usa26msg.h index 09e21e84fc4e..a68f1fb25b8a 100644 --- a/drivers/usb/serial/keyspan_usa26msg.h +++ b/drivers/usb/serial/keyspan_usa26msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* usa26msg.h diff --git a/drivers/usb/serial/keyspan_usa28msg.h b/drivers/usb/serial/keyspan_usa28msg.h index dee454c4609a..a19f3fe5d98d 100644 --- a/drivers/usb/serial/keyspan_usa28msg.h +++ b/drivers/usb/serial/keyspan_usa28msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* usa28msg.h diff --git a/drivers/usb/serial/keyspan_usa49msg.h b/drivers/usb/serial/keyspan_usa49msg.h index 163b2dea2ec5..8c3970fdd868 100644 --- a/drivers/usb/serial/keyspan_usa49msg.h +++ b/drivers/usb/serial/keyspan_usa49msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* usa49msg.h diff --git a/drivers/usb/serial/keyspan_usa67msg.h b/drivers/usb/serial/keyspan_usa67msg.h index 20fa3e2f7187..dcf502fdbb44 100644 --- a/drivers/usb/serial/keyspan_usa67msg.h +++ b/drivers/usb/serial/keyspan_usa67msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* usa67msg.h diff --git a/drivers/usb/serial/keyspan_usa90msg.h b/drivers/usb/serial/keyspan_usa90msg.h index 86708ecd8735..c4ca0f631d20 100644 --- a/drivers/usb/serial/keyspan_usa90msg.h +++ b/drivers/usb/serial/keyspan_usa90msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* usa90msg.h diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 98e7a5df0f6d..bb3f9aa4a909 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -46,6 +46,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_TB) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 4e2554d55362..559941ca884d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -8,6 +8,7 @@ #define PL2303_VENDOR_ID 0x067b #define PL2303_PRODUCT_ID 0x2303 +#define PL2303_PRODUCT_ID_TB 0x2304 #define PL2303_PRODUCT_ID_RSAQ2 0x04bb #define PL2303_PRODUCT_ID_DCU11 0x1234 #define PL2303_PRODUCT_ID_PHAROS 0xaaa0 @@ -20,6 +21,7 @@ #define PL2303_PRODUCT_ID_MOTOROLA 0x0307 #define PL2303_PRODUCT_ID_ZTEK 0xe1f1 + #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 4d0273508043..edbbb13d6de6 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -85,7 +85,8 @@ DEVICE(moto_modem, MOTO_IDS); /* Motorola Tetra driver */ #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ - { USB_DEVICE(0x0cad, 0x9012) } /* MTP6550 */ + { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); /* Novatel Wireless GPS driver */ diff --git a/drivers/usb/usbip/README b/drivers/usb/usbip/README deleted file mode 100644 index 41a2cf2e77a6..000000000000 --- a/drivers/usb/usbip/README +++ /dev/null @@ -1,7 +0,0 @@ -TODO: - - more discussion about the protocol - - testing - - review of the userspace interface - - document the protocol - -Please send patches for this code to Greg Kroah-Hartman diff --git a/drivers/vfio/pci/trace.h b/drivers/vfio/pci/trace.h index 4d13e510590e..b2aa986ab9ed 100644 --- a/drivers/vfio/pci/trace.h +++ b/drivers/vfio/pci/trace.h @@ -1,13 +1,9 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * VFIO PCI mmap/mmap_fault tracepoints * * Copyright (C) 2018 IBM Corp. All rights reserved. * Author: Alexey Kardashevskiy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #undef TRACE_SYSTEM diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c index 054a2cf9dd8e..32f695ffe128 100644 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c @@ -1,14 +1,10 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-only /* * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2. * * Copyright (C) 2018 IBM Corp. All rights reserved. * Author: Alexey Kardashevskiy * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * * Register an on-GPU RAM region for cacheable access. * * Derived from original vfio_pci_igd.c: @@ -178,11 +174,11 @@ static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev, struct vfio_pci_region *region, struct vfio_info_cap *caps) { struct vfio_pci_nvgpu_data *data = region->data; - struct vfio_region_info_cap_nvlink2_ssatgt cap = { 0 }; - - cap.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT; - cap.header.version = 1; - cap.tgt = data->gpu_tgt; + struct vfio_region_info_cap_nvlink2_ssatgt cap = { + .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, + .header.version = 1, + .tgt = data->gpu_tgt + }; return vfio_info_add_capability(caps, &cap.header, sizeof(cap)); } @@ -365,18 +361,18 @@ static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev, struct vfio_pci_region *region, struct vfio_info_cap *caps) { struct vfio_pci_npu2_data *data = region->data; - struct vfio_region_info_cap_nvlink2_ssatgt captgt = { 0 }; - struct vfio_region_info_cap_nvlink2_lnkspd capspd = { 0 }; + struct vfio_region_info_cap_nvlink2_ssatgt captgt = { + .header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, + .header.version = 1, + .tgt = data->gpu_tgt + }; + struct vfio_region_info_cap_nvlink2_lnkspd capspd = { + .header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, + .header.version = 1, + .link_speed = data->link_speed + }; int ret; - captgt.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT; - captgt.header.version = 1; - captgt.tgt = data->gpu_tgt; - - capspd.header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD; - capspd.header.version = 1; - capspd.link_speed = data->link_speed; - ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt)); if (ret) return ret; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 36f3d0f49e60..df51a35cf537 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1236,7 +1236,8 @@ static void handle_rx(struct vhost_net *net) if (nvq->done_idx > VHOST_NET_BATCH) vhost_net_signal_used(nvq); if (unlikely(vq_log)) - vhost_log_write(vq, vq_log, log, vhost_len); + vhost_log_write(vq, vq_log, log, vhost_len, + vq->iov, in); total_len += vhost_len; if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { vhost_poll_queue(&vq->poll); @@ -1336,7 +1337,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].rx_ring = NULL; vhost_net_buf_init(&n->vqs[i].rxq); } - vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, + UIO_MAXIOV + VHOST_NET_BATCH); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8e10ab436d1f..23593cb23dd0 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1127,16 +1127,18 @@ vhost_scsi_send_tmf_reject(struct vhost_scsi *vs, struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) { - struct virtio_scsi_ctrl_tmf_resp __user *resp; struct virtio_scsi_ctrl_tmf_resp rsp; + struct iov_iter iov_iter; int ret; pr_debug("%s\n", __func__); memset(&rsp, 0, sizeof(rsp)); rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; - resp = vq->iov[vc->out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) + + iov_iter_init(&iov_iter, READ, &vq->iov[vc->out], vc->in, sizeof(rsp)); + + ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); + if (likely(ret == sizeof(rsp))) vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n"); @@ -1147,16 +1149,18 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs, struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc) { - struct virtio_scsi_ctrl_an_resp __user *resp; struct virtio_scsi_ctrl_an_resp rsp; + struct iov_iter iov_iter; int ret; pr_debug("%s\n", __func__); memset(&rsp, 0, sizeof(rsp)); /* event_actual = 0 */ rsp.response = VIRTIO_SCSI_S_OK; - resp = vq->iov[vc->out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) + + iov_iter_init(&iov_iter, READ, &vq->iov[vc->out], vc->in, sizeof(rsp)); + + ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); + if (likely(ret == sizeof(rsp))) vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_ctrl_an_resp\n"); @@ -1623,7 +1627,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9f7942cbcbb2..24a129fcdd61 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -390,9 +390,9 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) vq->indirect = kmalloc_array(UIO_MAXIOV, sizeof(*vq->indirect), GFP_KERNEL); - vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log), + vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), GFP_KERNEL); - vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads), + vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), GFP_KERNEL); if (!vq->indirect || !vq->log || !vq->heads) goto err_nomem; @@ -414,7 +414,7 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) } void vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue **vqs, int nvqs) + struct vhost_virtqueue **vqs, int nvqs, int iov_limit) { struct vhost_virtqueue *vq; int i; @@ -427,6 +427,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iotlb = NULL; dev->mm = NULL; dev->worker = NULL; + dev->iov_limit = iov_limit; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); @@ -1034,8 +1035,10 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, int type, ret; ret = copy_from_iter(&type, sizeof(type), from); - if (ret != sizeof(type)) + if (ret != sizeof(type)) { + ret = -EINVAL; goto done; + } switch (type) { case VHOST_IOTLB_MSG: @@ -1054,8 +1057,10 @@ ssize_t vhost_chr_write_iter(struct vhost_dev *dev, iov_iter_advance(from, offset); ret = copy_from_iter(&msg, sizeof(msg), from); - if (ret != sizeof(msg)) + if (ret != sizeof(msg)) { + ret = -EINVAL; goto done; + } if (vhost_process_iotlb_msg(dev, &msg)) { ret = -EFAULT; goto done; @@ -1733,13 +1738,87 @@ static int log_write(void __user *log_base, return r; } +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) +{ + struct vhost_umem *umem = vq->umem; + struct vhost_umem_node *u; + u64 start, end, l, min; + int r; + bool hit = false; + + while (len) { + min = len; + /* More than one GPAs can be mapped into a single HVA. So + * iterate all possible umems here to be safe. + */ + list_for_each_entry(u, &umem->umem_list, link) { + if (u->userspace_addr > hva - 1 + len || + u->userspace_addr - 1 + u->size < hva) + continue; + start = max(u->userspace_addr, hva); + end = min(u->userspace_addr - 1 + u->size, + hva - 1 + len); + l = end - start + 1; + r = log_write(vq->log_base, + u->start + start - u->userspace_addr, + l); + if (r < 0) + return r; + hit = true; + min = min(l, min); + } + + if (!hit) + return -EFAULT; + + len -= min; + hva += min; + } + + return 0; +} + +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) +{ + struct iovec iov[64]; + int i, ret; + + if (!vq->iotlb) + return log_write(vq->log_base, vq->log_addr + used_offset, len); + + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, + len, iov, 64, VHOST_ACCESS_WO); + if (ret) + return ret; + + for (i = 0; i < ret; i++) { + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (ret) + return ret; + } + + return 0; +} + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len) + unsigned int log_num, u64 len, struct iovec *iov, int count) { int i, r; /* Make sure data written is seen before log. */ smp_wmb(); + + if (vq->iotlb) { + for (i = 0; i < count; i++) { + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, + iov[i].iov_len); + if (r < 0) + return r; + } + return 0; + } + for (i = 0; i < log_num; ++i) { u64 l = min(log[i].len, len); r = log_write(vq->log_base, log[i].addr, l); @@ -1769,9 +1848,8 @@ static int vhost_update_used_flags(struct vhost_virtqueue *vq) smp_wmb(); /* Log used flag write. */ used = &vq->used->flags; - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof vq->used->flags); + log_used(vq, (used - (void __user *)vq->used), + sizeof vq->used->flags); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -1789,9 +1867,8 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) smp_wmb(); /* Log avail event write */ used = vhost_avail_event(vq); - log_write(vq->log_base, vq->log_addr + - (used - (void __user *)vq->used), - sizeof *vhost_avail_event(vq)); + log_used(vq, (used - (void __user *)vq->used), + sizeof *vhost_avail_event(vq)); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } @@ -2191,10 +2268,8 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, /* Make sure data is seen before log. */ smp_wmb(); /* Log used ring entry write. */ - log_write(vq->log_base, - vq->log_addr + - ((void __user *)used - (void __user *)vq->used), - count * sizeof *used); + log_used(vq, ((void __user *)used - (void __user *)vq->used), + count * sizeof *used); } old = vq->last_used_idx; new = (vq->last_used_idx += count); @@ -2236,9 +2311,8 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, /* Make sure used idx is seen before log. */ smp_wmb(); /* Log used index update. */ - log_write(vq->log_base, - vq->log_addr + offsetof(struct vring_used, idx), - sizeof vq->used->idx); + log_used(vq, offsetof(struct vring_used, idx), + sizeof vq->used->idx); if (vq->log_ctx) eventfd_signal(vq->log_ctx, 1); } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 466ef7542291..9490e7ddb340 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -170,9 +170,11 @@ struct vhost_dev { struct list_head read_list; struct list_head pending_list; wait_queue_head_t wait; + int iov_limit; }; -void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); +void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, + int nvqs, int iov_limit); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); @@ -205,7 +207,8 @@ bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, - unsigned int log_num, u64 len); + unsigned int log_num, u64 len, + struct iovec *iov, int count); int vq_iotlb_prefetch(struct vhost_virtqueue *vq); struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3fbc068eaa9b..bb5fc0e9fbc2 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -531,7 +531,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 09731b2f6815..c6b3bdbbdbc9 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -271,6 +271,7 @@ static void vgacon_scrollback_update(struct vc_data *c, int t, int count) static void vgacon_restore_screen(struct vc_data *c) { + c->vc_origin = c->vc_visible_origin; vgacon_scrollback_cur->save = 0; if (!vga_is_gfx && !vgacon_scrollback_cur->restore) { @@ -287,8 +288,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) int start, end, count, soff; if (!lines) { - c->vc_visible_origin = c->vc_origin; - vga_set_mem_top(c); + vgacon_restore_screen(c); return; } @@ -298,6 +298,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) if (!vgacon_scrollback_cur->save) { vgacon_cursor(c, CM_ERASE); vgacon_save_screen(c); + c->vc_origin = (unsigned long)c->vc_screenbuf; vgacon_scrollback_cur->save = 1; } @@ -335,7 +336,7 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines) int copysize; int diff = c->vc_rows - count; - void *d = (void *) c->vc_origin; + void *d = (void *) c->vc_visible_origin; void *s = (void *) c->vc_screenbuf; count *= c->vc_size_row; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 8976190b6c1f..bfa1360ec750 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -510,6 +510,13 @@ static int __init fb_console_setup(char *this_opt) continue; } #endif + + if (!strncmp(options, "logo-pos:", 9)) { + options += 9; + if (!strcmp(options, "center")) + fb_center_logo = true; + continue; + } } return 1; } diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 558ed2ed3124..cb43a2258c51 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -53,6 +53,9 @@ EXPORT_SYMBOL(registered_fb); int num_registered_fb __read_mostly; EXPORT_SYMBOL(num_registered_fb); +bool fb_center_logo __read_mostly; +EXPORT_SYMBOL(fb_center_logo); + static struct fb_info *get_fb_info(unsigned int idx) { struct fb_info *fb_info; @@ -506,8 +509,7 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, fb_set_logo(info, logo, logo_new, fb_logo.depth); } -#ifdef CONFIG_FB_LOGO_CENTER - { + if (fb_center_logo) { int xres = info->var.xres; int yres = info->var.yres; @@ -520,11 +522,11 @@ static int fb_show_logo_line(struct fb_info *info, int rotate, --n; image.dx = (xres - n * (logo->width + 8) - 8) / 2; image.dy = y ?: (yres - logo->height) / 2; + } else { + image.dx = 0; + image.dy = y; } -#else - image.dx = 0; - image.dy = y; -#endif + image.width = logo->width; image.height = logo->height; @@ -684,9 +686,8 @@ int fb_prepare_logo(struct fb_info *info, int rotate) } height = fb_logo.logo->height; -#ifdef CONFIG_FB_LOGO_CENTER - height += (yres - fb_logo.logo->height) / 2; -#endif + if (fb_center_logo) + height += (yres - fb_logo.logo->height) / 2; return fb_prepare_extra_logos(info, height, yres); } diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index 31f769d67195..057d3cdef92e 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -318,28 +318,28 @@ static void __iomem *offb_map_reg(struct device_node *np, int index, } static void offb_init_palette_hacks(struct fb_info *info, struct device_node *dp, - const char *name, unsigned long address) + unsigned long address) { struct offb_par *par = (struct offb_par *) info->par; - if (dp && !strncmp(name, "ATY,Rage128", 11)) { + if (of_node_name_prefix(dp, "ATY,Rage128")) { par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); if (par->cmap_adr) par->cmap_type = cmap_r128; - } else if (dp && (!strncmp(name, "ATY,RageM3pA", 12) - || !strncmp(name, "ATY,RageM3p12A", 14))) { + } else if (of_node_name_prefix(dp, "ATY,RageM3pA") || + of_node_name_prefix(dp, "ATY,RageM3p12A")) { par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); if (par->cmap_adr) par->cmap_type = cmap_M3A; - } else if (dp && !strncmp(name, "ATY,RageM3pB", 12)) { + } else if (of_node_name_prefix(dp, "ATY,RageM3pB")) { par->cmap_adr = offb_map_reg(dp, 2, 0, 0x1fff); if (par->cmap_adr) par->cmap_type = cmap_M3B; - } else if (dp && !strncmp(name, "ATY,Rage6", 9)) { + } else if (of_node_name_prefix(dp, "ATY,Rage6")) { par->cmap_adr = offb_map_reg(dp, 1, 0, 0x1fff); if (par->cmap_adr) par->cmap_type = cmap_radeon; - } else if (!strncmp(name, "ATY,", 4)) { + } else if (of_node_name_prefix(dp, "ATY,")) { unsigned long base = address & 0xff000000UL; par->cmap_adr = ioremap(base + 0x7ff000, 0x1000) + 0xcc0; @@ -350,7 +350,7 @@ static void offb_init_palette_hacks(struct fb_info *info, struct device_node *dp par->cmap_adr = offb_map_reg(dp, 0, 0x6000, 0x1000); if (par->cmap_adr) par->cmap_type = cmap_gxt2000; - } else if (dp && !strncmp(name, "vga,Display-", 12)) { + } else if (of_node_name_prefix(dp, "vga,Display-")) { /* Look for AVIVO initialized by SLOF */ struct device_node *pciparent = of_get_parent(dp); const u32 *vid, *did; @@ -438,7 +438,7 @@ static void __init offb_init_fb(const char *name, par->cmap_type = cmap_unknown; if (depth == 8) - offb_init_palette_hacks(info, dp, name, address); + offb_init_palette_hacks(info, dp, address); else fix->visual = FB_VISUAL_TRUECOLOR; diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c index 53f93616c671..8e23160ec59f 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-ioctl.c @@ -609,6 +609,8 @@ int omapfb_ioctl(struct fb_info *fbi, unsigned int cmd, unsigned long arg) int r = 0; + memset(&p, 0, sizeof(p)); + switch (cmd) { case OMAPFB_SYNC_GFX: DBG("ioctl SYNC_GFX\n"); diff --git a/drivers/video/logo/Kconfig b/drivers/video/logo/Kconfig index 1e972c4e88b1..d1f6196c8b9a 100644 --- a/drivers/video/logo/Kconfig +++ b/drivers/video/logo/Kconfig @@ -10,15 +10,6 @@ menuconfig LOGO if LOGO -config FB_LOGO_CENTER - bool "Center the logo" - depends on FB=y - help - When this option is selected, the bootup logo is centered both - horizontally and vertically. If more than one logo is displayed - due to multiple CPUs, the collected line of logos is centered - as a whole. - config FB_LOGO_EXTRA bool depends on FB=y diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 728ecd1eea30..fb12fe205f86 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -61,6 +61,10 @@ enum virtio_balloon_vq { VIRTIO_BALLOON_VQ_MAX }; +enum virtio_balloon_config_read { + VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0, +}; + struct virtio_balloon { struct virtio_device *vdev; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; @@ -77,14 +81,20 @@ struct virtio_balloon { /* Prevent updating balloon when it is being canceled. */ spinlock_t stop_update_lock; bool stop_update; + /* Bitmap to indicate if reading the related config fields are needed */ + unsigned long config_read_bitmap; /* The list of allocated free pages, waiting to be given back to mm */ struct list_head free_page_list; spinlock_t free_page_list_lock; /* The number of free page blocks on the above list */ unsigned long num_free_page_blocks; - /* The cmd id received from host */ - u32 cmd_id_received; + /* + * The cmd id received from host. + * Read it via virtio_balloon_cmd_id_received to get the latest value + * sent from host. + */ + u32 cmd_id_received_cache; /* The cmd id that is actively in use */ __virtio32 cmd_id_active; /* Buffer to store the stop sign */ @@ -390,37 +400,31 @@ static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb, return num_returned; } +static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb) +{ + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + return; + + /* No need to queue the work if the bit was already set. */ + if (test_and_set_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, + &vb->config_read_bitmap)) + return; + + queue_work(vb->balloon_wq, &vb->report_free_page_work); +} + static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; unsigned long flags; - s64 diff = towards_target(vb); - if (diff) { - spin_lock_irqsave(&vb->stop_update_lock, flags); - if (!vb->stop_update) - queue_work(system_freezable_wq, - &vb->update_balloon_size_work); - spin_unlock_irqrestore(&vb->stop_update_lock, flags); - } - - if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { - virtio_cread(vdev, struct virtio_balloon_config, - free_page_report_cmd_id, &vb->cmd_id_received); - if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { - /* Pass ULONG_MAX to give back all the free pages */ - return_free_pages_to_mm(vb, ULONG_MAX); - } else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && - vb->cmd_id_received != - virtio32_to_cpu(vdev, vb->cmd_id_active)) { - spin_lock_irqsave(&vb->stop_update_lock, flags); - if (!vb->stop_update) { - queue_work(vb->balloon_wq, - &vb->report_free_page_work); - } - spin_unlock_irqrestore(&vb->stop_update_lock, flags); - } + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) { + queue_work(system_freezable_wq, + &vb->update_balloon_size_work); + virtio_balloon_queue_free_page_work(vb); } + spin_unlock_irqrestore(&vb->stop_update_lock, flags); } static void update_balloon_size(struct virtio_balloon *vb) @@ -527,6 +531,17 @@ static int init_vqs(struct virtio_balloon *vb) return 0; } +static u32 virtio_balloon_cmd_id_received(struct virtio_balloon *vb) +{ + if (test_and_clear_bit(VIRTIO_BALLOON_CONFIG_READ_CMD_ID, + &vb->config_read_bitmap)) + virtio_cread(vb->vdev, struct virtio_balloon_config, + free_page_report_cmd_id, + &vb->cmd_id_received_cache); + + return vb->cmd_id_received_cache; +} + static int send_cmd_id_start(struct virtio_balloon *vb) { struct scatterlist sg; @@ -537,7 +552,8 @@ static int send_cmd_id_start(struct virtio_balloon *vb) while (virtqueue_get_buf(vq, &unused)) ; - vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received); + vb->cmd_id_active = virtio32_to_cpu(vb->vdev, + virtio_balloon_cmd_id_received(vb)); sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active)); err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL); if (!err) @@ -620,7 +636,8 @@ static int send_free_pages(struct virtio_balloon *vb) * stop the reporting. */ cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active); - if (cmd_id_active != vb->cmd_id_received) + if (unlikely(cmd_id_active != + virtio_balloon_cmd_id_received(vb))) break; /* @@ -637,11 +654,9 @@ static int send_free_pages(struct virtio_balloon *vb) return 0; } -static void report_free_page_func(struct work_struct *work) +static void virtio_balloon_report_free_page(struct virtio_balloon *vb) { int err; - struct virtio_balloon *vb = container_of(work, struct virtio_balloon, - report_free_page_work); struct device *dev = &vb->vdev->dev; /* Start by sending the received cmd id to host with an outbuf. */ @@ -659,6 +674,23 @@ static void report_free_page_func(struct work_struct *work) dev_err(dev, "Failed to send a stop id, err = %d\n", err); } +static void report_free_page_func(struct work_struct *work) +{ + struct virtio_balloon *vb = container_of(work, struct virtio_balloon, + report_free_page_work); + u32 cmd_id_received; + + cmd_id_received = virtio_balloon_cmd_id_received(vb); + if (cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) { + /* Pass ULONG_MAX to give back all the free pages */ + return_free_pages_to_mm(vb, ULONG_MAX); + } else if (cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP && + cmd_id_received != + virtio32_to_cpu(vb->vdev, vb->cmd_id_active)) { + virtio_balloon_report_free_page(vb); + } +} + #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of @@ -885,7 +917,7 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); - vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP; + vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; vb->cmd_id_active = cpu_to_virtio32(vb->vdev, VIRTIO_BALLOON_CMD_ID_STOP); vb->cmd_id_stop = cpu_to_virtio32(vb->vdev, diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 4cd9ea5c75be..d9dd0f789279 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -468,7 +468,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); unsigned int irq = platform_get_irq(vm_dev->pdev, 0); - int i, err; + int i, err, queue_idx = 0; err = request_irq(irq, vm_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vm_dev); @@ -476,7 +476,12 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, return err; for (i = 0; i < nvqs; ++i) { - vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i], + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { vm_del_vqs(vdev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 465a6f5142cc..d0584c040c60 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -285,7 +285,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); u16 msix_vec; - int i, err, nvectors, allocated_vectors; + int i, err, nvectors, allocated_vectors, queue_idx = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) @@ -321,7 +321,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, msix_vec = allocated_vectors++; else msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false, msix_vec); if (IS_ERR(vqs[i])) { @@ -356,7 +356,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, const char * const names[], const bool *ctx) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i, err; + int i, err, queue_idx = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) @@ -374,7 +374,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs, vqs[i] = NULL; continue; } - vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], + vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false, VIRTIO_MSI_NO_VECTOR); if (IS_ERR(vqs[i])) { diff --git a/drivers/watchdog/mt7621_wdt.c b/drivers/watchdog/mt7621_wdt.c index 5c4a764717c4..81208cd3f4ec 100644 --- a/drivers/watchdog/mt7621_wdt.c +++ b/drivers/watchdog/mt7621_wdt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include diff --git a/drivers/watchdog/rt2880_wdt.c b/drivers/watchdog/rt2880_wdt.c index 98967f0a7d10..db7c57d82cfd 100644 --- a/drivers/watchdog/rt2880_wdt.c +++ b/drivers/watchdog/rt2880_wdt.c @@ -18,6 +18,7 @@ #include #include #include +#include #include diff --git a/drivers/watchdog/tqmx86_wdt.c b/drivers/watchdog/tqmx86_wdt.c index 0d3a0fbbd7a5..52941207a12a 100644 --- a/drivers/watchdog/tqmx86_wdt.c +++ b/drivers/watchdog/tqmx86_wdt.c @@ -79,13 +79,13 @@ static int tqmx86_wdt_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (IS_ERR(res)) - return PTR_ERR(res); + if (!res) + return -ENODEV; priv->io_base = devm_ioport_map(&pdev->dev, res->start, resource_size(res)); - if (IS_ERR(priv->io_base)) - return PTR_ERR(priv->io_base); + if (!priv->io_base) + return -ENOMEM; watchdog_set_drvdata(&priv->wdd, priv); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 93194f3e7540..117e76b2f939 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1650,7 +1650,7 @@ void xen_callback_vector(void) xen_have_vector_callback = 0; return; } - pr_info("Xen HVM callback vector for event delivery is enabled\n"); + pr_info_once("Xen HVM callback vector for event delivery is enabled\n"); alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, xen_hvm_callback_vector); } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 2e5d845b5091..7aa64d1b119c 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -160,9 +160,10 @@ static void pvcalls_conn_back_read(void *opaque) /* write the data, then modify the indexes */ virt_wmb(); - if (ret < 0) + if (ret < 0) { + atomic_set(&map->read, 0); intf->in_error = ret; - else + } else intf->in_prod = prod + ret; /* update the indexes, then notify the other end */ virt_wmb(); @@ -282,13 +283,11 @@ static int pvcalls_back_socket(struct xenbus_device *dev, static void pvcalls_sk_state_change(struct sock *sock) { struct sock_mapping *map = sock->sk_user_data; - struct pvcalls_data_intf *intf; if (map == NULL) return; - intf = map->ring; - intf->in_error = -ENOTCONN; + atomic_inc(&map->read); notify_remote_via_irq(map->irq); } diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 77224d8f3e6f..8a249c95c193 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -31,6 +31,12 @@ #define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE) #define PVCALLS_FRONT_MAX_SPIN 5000 +static struct proto pvcalls_proto = { + .name = "PVCalls", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), +}; + struct pvcalls_bedata { struct xen_pvcalls_front_ring ring; grant_ref_t ref; @@ -335,6 +341,42 @@ int pvcalls_front_socket(struct socket *sock) return ret; } +static void free_active_ring(struct sock_mapping *map) +{ + if (!map->active.ring) + return; + + free_pages((unsigned long)map->active.data.in, + map->active.ring->ring_order); + free_page((unsigned long)map->active.ring); +} + +static int alloc_active_ring(struct sock_mapping *map) +{ + void *bytes; + + map->active.ring = (struct pvcalls_data_intf *) + get_zeroed_page(GFP_KERNEL); + if (!map->active.ring) + goto out; + + map->active.ring->ring_order = PVCALLS_RING_ORDER; + bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + PVCALLS_RING_ORDER); + if (!bytes) + goto out; + + map->active.data.in = bytes; + map->active.data.out = bytes + + XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); + + return 0; + +out: + free_active_ring(map); + return -ENOMEM; +} + static int create_active(struct sock_mapping *map, int *evtchn) { void *bytes; @@ -343,15 +385,7 @@ static int create_active(struct sock_mapping *map, int *evtchn) *evtchn = -1; init_waitqueue_head(&map->active.inflight_conn_req); - map->active.ring = (struct pvcalls_data_intf *) - __get_free_page(GFP_KERNEL | __GFP_ZERO); - if (map->active.ring == NULL) - goto out_error; - map->active.ring->ring_order = PVCALLS_RING_ORDER; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PVCALLS_RING_ORDER); - if (bytes == NULL) - goto out_error; + bytes = map->active.data.in; for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) map->active.ring->ref[i] = gnttab_grant_foreign_access( pvcalls_front_dev->otherend_id, @@ -361,10 +395,6 @@ static int create_active(struct sock_mapping *map, int *evtchn) pvcalls_front_dev->otherend_id, pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0); - map->active.data.in = bytes; - map->active.data.out = bytes + - XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); - ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn); if (ret) goto out_error; @@ -385,8 +415,6 @@ static int create_active(struct sock_mapping *map, int *evtchn) out_error: if (*evtchn >= 0) xenbus_free_evtchn(pvcalls_front_dev, *evtchn); - free_pages((unsigned long)map->active.data.in, PVCALLS_RING_ORDER); - free_page((unsigned long)map->active.ring); return ret; } @@ -406,17 +434,24 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, return PTR_ERR(map); bedata = dev_get_drvdata(&pvcalls_front_dev->dev); + ret = alloc_active_ring(map); + if (ret < 0) { + pvcalls_exit_sock(sock); + return ret; + } spin_lock(&bedata->socket_lock); ret = get_request(bedata, &req_id); if (ret < 0) { spin_unlock(&bedata->socket_lock); + free_active_ring(map); pvcalls_exit_sock(sock); return ret; } ret = create_active(map, &evtchn); if (ret < 0) { spin_unlock(&bedata->socket_lock); + free_active_ring(map); pvcalls_exit_sock(sock); return ret; } @@ -469,8 +504,10 @@ static int __write_ring(struct pvcalls_data_intf *intf, virt_mb(); size = pvcalls_queued(prod, cons, array_size); - if (size >= array_size) + if (size > array_size) return -EINVAL; + if (size == array_size) + return 0; if (len > array_size - size) len = array_size - size; @@ -560,15 +597,13 @@ static int __read_ring(struct pvcalls_data_intf *intf, error = intf->in_error; /* get pointers before reading from the ring */ virt_rmb(); - if (error < 0) - return error; size = pvcalls_queued(prod, cons, array_size); masked_prod = pvcalls_mask(prod, array_size); masked_cons = pvcalls_mask(cons, array_size); if (size == 0) - return 0; + return error ?: size; if (len > size) len = size; @@ -780,25 +815,36 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) } } + map2 = kzalloc(sizeof(*map2), GFP_KERNEL); + if (map2 == NULL) { + clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, + (void *)&map->passive.flags); + pvcalls_exit_sock(sock); + return -ENOMEM; + } + ret = alloc_active_ring(map2); + if (ret < 0) { + clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, + (void *)&map->passive.flags); + kfree(map2); + pvcalls_exit_sock(sock); + return ret; + } spin_lock(&bedata->socket_lock); ret = get_request(bedata, &req_id); if (ret < 0) { clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); spin_unlock(&bedata->socket_lock); + free_active_ring(map2); + kfree(map2); pvcalls_exit_sock(sock); return ret; } - map2 = kzalloc(sizeof(*map2), GFP_ATOMIC); - if (map2 == NULL) { - clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, - (void *)&map->passive.flags); - spin_unlock(&bedata->socket_lock); - pvcalls_exit_sock(sock); - return -ENOMEM; - } + ret = create_active(map2, &evtchn); if (ret < 0) { + free_active_ring(map2); kfree(map2); clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT, (void *)&map->passive.flags); @@ -839,7 +885,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags) received: map2->sock = newsock; - newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL); + newsock->sk = sk_alloc(sock_net(sock->sk), PF_INET, GFP_KERNEL, &pvcalls_proto, false); if (!newsock->sk) { bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID; map->passive.inflight_req_id = PVCALLS_INVALID_ID; @@ -1032,8 +1078,8 @@ int pvcalls_front_release(struct socket *sock) spin_lock(&bedata->socket_lock); list_del(&map->list); spin_unlock(&bedata->socket_lock); - if (READ_ONCE(map->passive.inflight_req_id) != - PVCALLS_INVALID_ID) { + if (READ_ONCE(map->passive.inflight_req_id) != PVCALLS_INVALID_ID && + READ_ONCE(map->passive.inflight_req_id) != 0) { pvcalls_front_free_map(bedata, map->passive.accept_map); } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 989cf872b98c..bb7888429be6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -645,7 +645,7 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#ifdef CONFIG_ARM if (xen_get_dma_ops(dev)->mmap) return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -662,7 +662,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#ifdef CONFIG_ARM if (xen_get_dma_ops(dev)->get_sgtable) { #if 0 /* diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 0568fd986821..e432bd27a2e7 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -208,7 +208,7 @@ again: /* The new front of the queue now owns the state variables. */ next = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - vnode->lock_key = afs_file_key(next->fl_file); + vnode->lock_key = key_get(afs_file_key(next->fl_file)); vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; goto again; @@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl) /* The new front of the queue now owns the state variables. */ next = list_entry(vnode->pending_locks.next, struct file_lock, fl_u.afs.link); - vnode->lock_key = afs_file_key(next->fl_file); + vnode->lock_key = key_get(afs_file_key(next->fl_file)); vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB; afs_lock_may_be_available(vnode); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 6b17d3620414..1a4ce07fb406 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; } else { - vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; vnode->cb_v_break = vnode->volume->cb_v_break; valid = false; } @@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode) #endif afs_put_permits(rcu_access_pointer(vnode->permit_cache)); + key_put(vnode->lock_key); + vnode->lock_key = NULL; _leave(""); } diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index 07bc10f076aa..d443e2bfa094 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus { struct yfs_xdr_u64 max_quota; struct yfs_xdr_u64 file_quota; } __packed; + +enum yfs_lock_type { + yfs_LockNone = -1, + yfs_LockRead = 0, + yfs_LockWrite = 1, + yfs_LockExtend = 2, + yfs_LockRelease = 3, + yfs_LockMandatoryRead = 0x100, + yfs_LockMandatoryWrite = 0x101, + yfs_LockMandatoryExtend = 0x102, +}; diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index a7b44863d502..2c588f9bbbda 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_delete_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); @@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call) } } +static struct afs_call *afs_get_call(struct afs_call *call, + enum afs_call_trace why) +{ + int u = atomic_inc_return(&call->usage); + + trace_afs_call(call, why, u, + atomic_read(&call->net->nr_outstanding_calls), + __builtin_return_address(0)); + return call; +} + /* * Queue the call for actual work. */ static void afs_queue_call_work(struct afs_call *call) { if (call->type->work) { - int u = atomic_inc_return(&call->usage); - - trace_afs_call(call, afs_call_trace_work, u, - atomic_read(&call->net->nr_outstanding_calls), - __builtin_return_address(0)); - INIT_WORK(&call->work, call->type->work); + afs_get_call(call, afs_call_trace_work); if (!queue_work(afs_wq, &call->work)) afs_put_call(call); } @@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, } } + /* If the call is going to be asynchronous, we need an extra ref for + * the call to hold itself so the caller need not hang on to its ref. + */ + if (call->async) + afs_get_call(call, afs_call_trace_get); + /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, (unsigned long)call, @@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, goto error_do_abort; } - /* at this point, an async call may no longer exist as it may have - * already completed */ - if (call->async) + /* Note that at this point, we may have received the reply or an abort + * - and an asynchronous call may already have completed. + */ + if (call->async) { + afs_put_call(call); return -EINPROGRESS; + } return afs_wait_for_call_to_complete(call, ac); error_do_abort: - call->state = AFS_CALL_COMPLETE; if (ret != -ECONNABORTED) { rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); @@ -463,8 +478,24 @@ error_do_abort: error_kill_call: if (call->type->done) call->type->done(call); - afs_put_call(call); + + /* We need to dispose of the extra ref we grabbed for an async call. + * The call, however, might be queued on afs_async_calls and we need to + * make sure we don't get any more notifications that might requeue it. + */ + if (call->rxcall) { + rxrpc_kernel_end_call(call->net->socket, call->rxcall); + call->rxcall = NULL; + } + if (call->async) { + if (cancel_work_sync(&call->async_work)) + afs_put_call(call); + afs_put_call(call); + } + ac->error = ret; + call->state = AFS_CALL_COMPLETE; + afs_put_call(call); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 95d0761cdb34..155dc14caef9 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, if (vldb->fs_mask[i] & type_mask) nr_servers++; - slist = kzalloc(sizeof(struct afs_server_list) + - sizeof(struct afs_server_entry) * nr_servers, - GFP_KERNEL); + slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL); if (!slist) goto error; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 12658c1363ae..5aa57929e8c2 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc, bp = xdr_encode_YFSFid(bp, &vnode->fid); bp = xdr_encode_string(bp, name, namesz); bp = xdr_encode_YFSStoreStatus_mode(bp, mode); - bp = xdr_encode_u32(bp, 0); /* ViceLockType */ + bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); afs_use_fs_server(call, fc->cbi); diff --git a/fs/block_dev.c b/fs/block_dev.c index c546cdce77e6..58a4c1217fa8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev) } EXPORT_SYMBOL(invalidate_bdev); +static void set_init_blocksize(struct block_device *bdev) +{ + unsigned bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_block_size = bsize; + bdev->bd_inode->i_blkbits = blksize_bits(bsize); +} + int set_blocksize(struct block_device *bdev, int size) { /* Size must be a power of two, and between 512 and PAGE_SIZE */ @@ -1431,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_size(struct block_device *bdev, loff_t size) { - unsigned bsize = bdev_logical_block_size(bdev); - inode_lock(bdev->bd_inode); i_size_write(bdev->bd_inode, size); inode_unlock(bdev->bd_inode); - while (bsize < PAGE_SIZE) { - if (size & bsize) - break; - bsize <<= 1; - } - bdev->bd_block_size = bsize; - bdev->bd_inode->i_blkbits = blksize_bits(bsize); } EXPORT_SYMBOL(bd_set_size); @@ -1519,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret) + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + set_init_blocksize(bdev); + } /* * If the device is invalidated, rescan partition @@ -1555,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + set_init_blocksize(bdev); } if (bdev->bd_bdi == &noop_backing_dev_info) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0a68cf7032f5..7a2a2621f0d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,6 +35,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; struct btrfs_pending_snapshot; +struct btrfs_delayed_ref_root; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; @@ -786,6 +787,9 @@ enum { * main phase. The fs_info::balance_ctl is initialized. */ BTRFS_FS_BALANCE_RUNNING, + + /* Indicate that the cleaner thread is awake and doing something. */ + BTRFS_FS_CLEANER_RUNNING, }; struct btrfs_fs_info { @@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info, unsigned long count, u64 transid, int wait); +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8da2f380d3c0..6a2a2a951705 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg) while (1) { again = 0; + set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); + /* Make the cleaner go to sleep early. */ if (btrfs_need_cleaner_sleep(fs_info)) goto sleep; @@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: + clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) @@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); + + /* + * We need this here because if we've been flipped read-only we won't + * get sync() from the umount, so we need to make sure any ordered + * extents that haven't had their dirty pages IO start writeout yet + * actually get run and error out properly. + */ + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); } static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, @@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, if (pin_bytes) btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes, 1); + btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); btrfs_put_delayed_ref_head(head); cond_resched(); spin_lock(&delayed_refs->lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b15afeae16df..d81035b7ea7d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, return ret ? ret : 1; } -static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_head *head) +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) { - struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_delayed_ref_root *delayed_refs = - &trans->transaction->delayed_refs; int nr_items = 1; /* Dropping this ref head update. */ if (head->total_ref_mod < 0) { @@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, } } - cleanup_ref_head_accounting(trans, head); + btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); trace_run_delayed_ref_head(fs_info, head, 0); btrfs_delayed_ref_unlock(head); @@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info, ret = 0; break; case COMMIT_TRANS: + /* + * If we have pending delayed iputs then we could free up a + * bunch of pinned space, so make sure we run the iputs before + * we do our pinned bytes check below. + */ + mutex_lock(&fs_info->cleaner_delayed_iput_mutex); + btrfs_run_delayed_iputs(fs_info); + mutex_unlock(&fs_info->cleaner_delayed_iput_mutex); + ret = may_commit_transaction(fs_info, space_info); break; default: @@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, if (head->must_insert_reserved) ret = 1; - cleanup_ref_head_accounting(trans, head); + btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head); mutex_unlock(&head->mutex); btrfs_put_delayed_ref_head(head); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43eb4535319d..5c349667c761 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3129,9 +3129,6 @@ out: /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - /* Try to release some metadata so we don't get an OOM but don't wait */ - btrfs_btree_balance_dirty_nodelay(fs_info); - return ret; } @@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode) ASSERT(list_empty(&binode->delayed_iput)); list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); spin_unlock(&fs_info->delayed_iput_lock); + if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags)) + wake_up_process(fs_info->cleaner_kthread); } void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 94c026bba2c2..bba28a5034ba 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1035,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci) list_del_init(&ci->i_snap_realm_item); ci->i_snap_realm_counter++; ci->i_snap_realm = NULL; + if (realm->ino == ci->i_vino.ino) + realm->inode = NULL; spin_unlock(&realm->inodes_with_caps_lock); ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc, realm); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index 03f4d24db8fe..9455d3aef0c3 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -3,19 +3,6 @@ * quota.c - CephFS quota * * Copyright (C) 2017-2018 SUSE - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . */ #include diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 593fb422d0f3..e92a2fee3c57 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -252,6 +252,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, ",ACL"); #endif seq_putc(m, '\n'); + seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize); seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); seq_printf(m, "Servers:"); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index e18915415e13..bb54ccf8481c 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1549,18 +1549,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server) } static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid, + bool malformed) { int length; - struct cifs_readdata *rdata = mid->callback_data; length = cifs_discard_remaining_data(server); - dequeue_mid(mid, rdata->result); + dequeue_mid(mid, malformed); mid->resp_buf = server->smallbuf; server->smallbuf = NULL; return length; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + struct cifs_readdata *rdata = mid->callback_data; + + return __cifs_readv_discard(server, mid, rdata->result); +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1602,12 +1610,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return -1; } + /* set up first two iov for signature check and to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = server->total_read - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - return cifs_readv_discard(server, mid); + /* normal error on read response */ + return __cifs_readv_discard(server, mid, false); } /* Is there enough to get to the rest of the READ_RSP header? */ @@ -1651,14 +1670,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) server->total_read += length; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->total_read - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n", - rdata->iov[0].iov_base, server->total_read); - /* how much data is in the response? */ #ifdef CONFIG_CIFS_SMB_DIRECT use_rdma_mr = rdata->mr; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 683310f26171..8463c940e0e5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -720,6 +720,21 @@ server_unresponsive(struct TCP_Server_Info *server) return false; } +static inline bool +zero_credits(struct TCP_Server_Info *server) +{ + int val; + + spin_lock(&server->req_lock); + val = server->credits + server->echo_credits + server->oplock_credits; + if (server->in_flight == 0 && val == 0) { + spin_unlock(&server->req_lock); + return true; + } + spin_unlock(&server->req_lock); + return false; +} + static int cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) { @@ -732,6 +747,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg) for (total_read = 0; msg_data_left(smb_msg); total_read += length) { try_to_freeze(); + /* reconnect if no credits and no requests in flight */ + if (zero_credits(server)) { + cifs_reconnect(server); + return -ECONNABORTED; + } + if (server_unresponsive(server)) return -ECONNABORTED; if (cifs_rdma_enabled(server) && server->smbd_conn) diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index f14533da3a93..01a76bccdb8d 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -293,6 +293,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_file_all_info *smb2_data; __u32 create_options = 0; + struct cifs_fid fid; + bool no_cached_open = tcon->nohandlecache; *adjust_tz = false; *symlink = false; @@ -301,6 +303,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; + + /* If it is a root and its handle is cached then use it */ + if (!strlen(full_path) && !no_cached_open) { + rc = open_shroot(xid, tcon, &fid); + if (rc) + goto out; + rc = SMB2_query_info(xid, tcon, fid.persistent_fid, + fid.volatile_fid, smb2_data); + close_shroot(&tcon->crfid); + if (rc) + goto out; + move_smb2_info_to_cifs(data, smb2_data); + goto out; + } + if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 6a9c47541c53..7b8b58fb4d3f 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -648,6 +648,13 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK) return false; + if (rsp->sync_hdr.CreditRequest) { + spin_lock(&server->req_lock); + server->credits += le16_to_cpu(rsp->sync_hdr.CreditRequest); + spin_unlock(&server->req_lock); + wake_up(&server->request_q); + } + if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index cf7eb891804f..153238fc4fa9 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -34,6 +34,7 @@ #include "cifs_ioctl.h" #include "smbdirect.h" +/* Change credits for different ops and return the total number of credits */ static int change_conf(struct TCP_Server_Info *server) { @@ -41,17 +42,15 @@ change_conf(struct TCP_Server_Info *server) server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: - return -1; + return 0; case 1: server->echoes = false; server->oplocks = false; - cifs_dbg(VFS, "disabling echoes and oplocks\n"); break; case 2: server->echoes = true; server->oplocks = false; server->echo_credits = 1; - cifs_dbg(FYI, "disabling oplocks\n"); break; default: server->echoes = true; @@ -64,14 +63,15 @@ change_conf(struct TCP_Server_Info *server) server->echo_credits = 1; } server->credits -= server->echo_credits + server->oplock_credits; - return 0; + return server->credits + server->echo_credits + server->oplock_credits; } static void smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, const int optype) { - int *val, rc = 0; + int *val, rc = -1; + spin_lock(&server->req_lock); val = server->ops->get_credits_field(server, optype); @@ -101,8 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add, } spin_unlock(&server->req_lock); wake_up(&server->request_q); - if (rc) - cifs_reconnect(server); + + if (server->tcpStatus == CifsNeedReconnect) + return; + + switch (rc) { + case -1: + /* change_conf hasn't been executed */ + break; + case 0: + cifs_dbg(VFS, "Possible client or server bug - zero credits\n"); + break; + case 1: + cifs_dbg(VFS, "disabling echoes and oplocks\n"); + break; + case 2: + cifs_dbg(FYI, "disabling oplocks\n"); + break; + default: + cifs_dbg(FYI, "add %u credits total=%d\n", add, rc); + } } static void @@ -136,7 +154,11 @@ smb2_get_credits(struct mid_q_entry *mid) { struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf; - return le16_to_cpu(shdr->CreditRequest); + if (mid->mid_state == MID_RESPONSE_RECEIVED + || mid->mid_state == MID_RESPONSE_MALFORMED) + return le16_to_cpu(shdr->CreditRequest); + + return 0; } static int @@ -165,14 +187,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, scredits = server->credits; /* can deadlock with reopen */ - if (scredits == 1) { + if (scredits <= 8) { *num = SMB2_MAX_BUFFER_SIZE; *credits = 0; break; } - /* leave one credit for a possible reopen */ - scredits--; + /* leave some credits for reopen and other ops */ + scredits -= 8; *num = min_t(unsigned int, size, scredits * SMB2_MAX_BUFFER_SIZE); @@ -3189,11 +3211,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, server->ops->is_status_pending(buf, server, 0)) return -1; - rdata->result = server->ops->map_error(buf, false); + /* set up first two iov to get credits */ + rdata->iov[0].iov_base = buf; + rdata->iov[0].iov_len = 4; + rdata->iov[1].iov_base = buf + 4; + rdata->iov[1].iov_len = + min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4; + cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", + rdata->iov[0].iov_base, rdata->iov[0].iov_len); + cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n", + rdata->iov[1].iov_base, rdata->iov[1].iov_len); + + rdata->result = server->ops->map_error(buf, true); if (rdata->result != 0) { cifs_dbg(FYI, "%s: server returned error %d\n", __func__, rdata->result); - dequeue_mid(mid, rdata->result); + /* normal error on read response */ + dequeue_mid(mid, false); return 0; } @@ -3266,14 +3300,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return 0; } - /* set up first iov for signature check */ - rdata->iov[0].iov_base = buf; - rdata->iov[0].iov_len = 4; - rdata->iov[1].iov_base = buf + 4; - rdata->iov[1].iov_len = server->vals->read_rsp_size - 4; - cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n", - rdata->iov[0].iov_base, server->vals->read_rsp_size); - length = rdata->copy_into_pages(server, rdata, &iter); kfree(bvec); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 50811a7dc0e0..2ff209ec4fab 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2816,6 +2816,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, int resp_buftype = CIFS_NO_BUFFER; struct cifs_ses *ses = tcon->ses; int flags = 0; + bool allocated = false; cifs_dbg(FYI, "Query Info\n"); @@ -2855,14 +2856,21 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon, "Error %d allocating memory for acl\n", rc); *dlen = 0; + rc = -ENOMEM; goto qinf_exit; } + allocated = true; } } rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset), le32_to_cpu(rsp->OutputBufferLength), &rsp_iov, min_len, *data); + if (rc && allocated) { + kfree(*data); + *data = NULL; + *dlen = 0; + } qinf_exit: SMB2_query_info_free(&rqst); @@ -2916,9 +2924,10 @@ smb2_echo_callback(struct mid_q_entry *mid) { struct TCP_Server_Info *server = mid->callback_data; struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf; - unsigned int credits_received = 1; + unsigned int credits_received = 0; - if (mid->mid_state == MID_RESPONSE_RECEIVED) + if (mid->mid_state == MID_RESPONSE_RECEIVED + || mid->mid_state == MID_RESPONSE_MALFORMED) credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest); DeleteMidQEntry(mid); @@ -3175,7 +3184,7 @@ smb2_readv_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = tcon->ses->server; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)rdata->iov[0].iov_base; - unsigned int credits_received = 1; + unsigned int credits_received = 0; struct smb_rqst rqst = { .rq_iov = rdata->iov, .rq_nvec = 2, .rq_pages = rdata->pages, @@ -3214,6 +3223,9 @@ smb2_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + credits_received = le16_to_cpu(shdr->CreditRequest); + /* fall through */ default: if (rdata->result != -ENODATA) rdata->result = -EIO; @@ -3399,7 +3411,7 @@ smb2_writev_callback(struct mid_q_entry *mid) struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); unsigned int written; struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf; - unsigned int credits_received = 1; + unsigned int credits_received = 0; switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: @@ -3427,6 +3439,9 @@ smb2_writev_callback(struct mid_q_entry *mid) case MID_RETRY_NEEDED: wdata->result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest); + /* fall through */ default: wdata->result = -EIO; break; diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c index bd4a546feec1..465483787193 100644 --- a/fs/cifs/trace.c +++ b/fs/cifs/trace.c @@ -3,16 +3,6 @@ * Copyright (C) 2018, Microsoft Corporation. * * Author(s): Steve French - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index fb049809555f..59be48206932 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -3,16 +3,6 @@ * Copyright (C) 2018, Microsoft Corporation. * * Author(s): Steve French - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cifs diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 202e0e84efdd..53532bd3f50d 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -786,17 +786,8 @@ static void cifs_compound_callback(struct mid_q_entry *mid) { struct TCP_Server_Info *server = mid->server; - unsigned int optype = mid->optype; - unsigned int credits_received = 0; - if (mid->mid_state == MID_RESPONSE_RECEIVED) { - if (mid->resp_buf) - credits_received = server->ops->get_credits(mid); - else - cifs_dbg(FYI, "Bad state for cancelled MID\n"); - } - - add_credits(server, credits_received, optype); + add_credits(server, server->ops->get_credits(mid), mid->optype); } static void diff --git a/fs/direct-io.c b/fs/direct-io.c index dbc1a1f080ce..ec2fb6fe6d37 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -679,6 +679,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, unsigned long fs_count; /* Number of filesystem-sized blocks */ int create; unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; + loff_t i_size; /* * If there was a memory error and we've overwritten all the @@ -708,8 +709,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, */ create = dio->op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { - if (fs_startblk <= ((i_size_read(dio->inode) - 1) >> - i_blkbits)) + i_size = i_size_read(dio->inode); + if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits) create = 0; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b40168fcc94a..36855c1f8daf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -331,11 +331,22 @@ struct inode_switch_wbs_context { struct work_struct work; }; +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + down_write(&bdi->wb_switch_rwsem); +} + +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) +{ + up_write(&bdi->wb_switch_rwsem); +} + static void inode_switch_wbs_work_fn(struct work_struct *work) { struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; + struct backing_dev_info *bdi = inode_to_bdi(inode); struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -343,6 +354,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct page *page; bool switched = false; + /* + * If @inode switches cgwb membership while sync_inodes_sb() is + * being issued, sync_inodes_sb() might miss it. Synchronize. + */ + down_read(&bdi->wb_switch_rwsem); + /* * By the time control reaches here, RCU grace period has passed * since I_WB_SWITCH assertion and all wb stat update transactions @@ -428,6 +445,8 @@ skip_switch: spin_unlock(&new_wb->list_lock); spin_unlock(&old_wb->list_lock); + up_read(&bdi->wb_switch_rwsem); + if (switched) { wb_wakeup(new_wb); wb_put(old_wb); @@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) if (inode->i_state & I_WB_SWITCH) return; + /* + * Avoid starting new switches while sync_inodes_sb() is in + * progress. Otherwise, if the down_write protected issue path + * blocks heavily, we might end up starting a large number of + * switches which will block on the rwsem. + */ + if (!down_read_trylock(&bdi->wb_switch_rwsem)) + return; + isw = kzalloc(sizeof(*isw), GFP_ATOMIC); if (!isw) - return; + goto out_unlock; /* find and pin the new wb */ rcu_read_lock(); @@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) * Let's continue after I_WB_SWITCH is guaranteed to be visible. */ call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); - return; + goto out_unlock; out_free: if (isw->new_wb) wb_put(isw->new_wb); kfree(isw); +out_unlock: + up_read(&bdi->wb_switch_rwsem); } /** @@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init); #else /* CONFIG_CGROUP_WRITEBACK */ +static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } +static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } + static struct bdi_writeback * locked_inode_to_wb_and_lock_list(struct inode *inode) __releases(&inode->i_lock) @@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb) return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); + /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ + bdi_down_write_wb_switch_rwsem(bdi); bdi_split_work_to_wbs(bdi, &work, false); wb_wait_for_completion(bdi, &done); + bdi_up_write_wb_switch_rwsem(bdi); wait_sb_inodes(sb); } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 46d691ba04bc..45b2322e092d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { - ssize_t ret; - if (file_inode(file_in) == file_inode(file_out)) return -EINVAL; -retry: - ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); - if (ret == -EAGAIN) - goto retry; - return ret; + return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 105576daca4a..798f1253141a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, return -EBADF; /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */ - if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) - return -EINVAL; + if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) { + ret = -EINVAL; + goto fput_and_out; + } /* verify that this is indeed an inotify instance */ if (unlikely(f.file->f_op != &inotify_fops)) { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 96f7d32cd184..898c8321b343 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id, struct pstore_record *record) { struct persistent_ram_zone *prz; - bool update = (record->type == PSTORE_TYPE_DMESG); /* Give up if we never existed or have hit the end. */ if (!przs) @@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id, return NULL; /* Update old/shadowed buffer. */ - if (update) + if (prz->type == PSTORE_TYPE_DMESG) persistent_ram_save_old(prz); if (!persistent_ram_old_size(prz)) @@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ramoops_platform_data *pdata = dev->platform_data; + struct ramoops_platform_data pdata_local; struct ramoops_context *cxt = &oops_cxt; size_t dump_mem_sz; phys_addr_t paddr; int err = -EINVAL; if (dev_of_node(dev) && !pdata) { - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) { - pr_err("cannot allocate platform data buffer\n"); - err = -ENOMEM; - goto fail_out; - } + pdata = &pdata_local; + memset(pdata, 0, sizeof(*pdata)); err = ramoops_parse_dt(pdev, pdata); if (err < 0) diff --git a/include/dt-bindings/reset/amlogic,meson-axg-reset.h b/include/dt-bindings/reset/amlogic,meson-axg-reset.h index ad6f55dabd6d..0f2e0fe45ca4 100644 --- a/include/dt-bindings/reset/amlogic,meson-axg-reset.h +++ b/include/dt-bindings/reset/amlogic,meson-axg-reset.h @@ -1,12 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * * Copyright (c) 2016 BayLibre, SAS. * Author: Neil Armstrong * * Copyright (c) 2017 Amlogic, inc. * Author: Yixun Lan * - * SPDX-License-Identifier: (GPL-2.0+ OR BSD) */ #ifndef _DT_BINDINGS_AMLOGIC_MESON_AXG_RESET_H diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c31157135598..07e02d6df5ad 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -190,6 +190,7 @@ struct backing_dev_info { struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ + struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h index 7cca5f859a90..f3c43519baa7 100644 --- a/include/linux/bcma/bcma_soc.h +++ b/include/linux/bcma/bcma_soc.h @@ -6,6 +6,7 @@ struct bcma_soc { struct bcma_bus bus; + struct device *dev; }; int __init bcma_host_soc_register(struct bcma_soc *soc); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5c7e7f859a24..d66bf5f32610 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -287,7 +287,7 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* seset a zone write pointer */ + /* reset a zone write pointer */ REQ_OP_ZONE_RESET = 6, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b..0394f1f9213b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -268,9 +268,15 @@ struct bpf_verifier_ops { }; struct bpf_prog_offload_ops { + /* verifier basic callbacks */ int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int (*finalize)(struct bpf_verifier_env *env); + /* verifier optimization callbacks (called after .finalize) */ + int (*replace_insn)(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); + int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt); + /* program management callbacks */ int (*prepare)(struct bpf_prog *prog); int (*translate)(struct bpf_prog *prog); void (*destroy)(struct bpf_prog *prog); @@ -283,6 +289,7 @@ struct bpf_prog_offload { void *dev_priv; struct list_head offloads; bool dev_state; + bool opt_failed; void *jited_image; u32 jited_len; }; @@ -397,6 +404,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); /* an array of programs to be executed under rcu_lock. * diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 573cca00a0e6..0620e418dde5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,6 +187,7 @@ struct bpf_insn_aux_data { int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ u8 alu_state; /* used in combination with alu_limit */ + unsigned int orig_idx; /* original instruction index */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -265,5 +266,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); int bpf_prog_offload_finalize(struct bpf_verifier_env *env); +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn); +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 39f668d5066b..333a6695a918 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -3,9 +3,8 @@ #error "Please don't include directly, include instead." #endif -/* Some compiler specific definitions are overwritten here - * for Clang compiler - */ +/* Compiler specific definitions for Clang compiler */ + #define uninitialized_var(x) x = *(&(x)) /* same as gcc, this was present in clang-2.6 so we can assume it works diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index dd8268f5f5f0..e8579412ad21 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -58,10 +58,6 @@ (typeof(ptr)) (__ptr + (off)); \ }) -/* Make the optimizer believe the variable can be manipulated arbitrarily. */ -#define OPTIMIZER_HIDE_VAR(var) \ - __asm__ ("" : "=r" (var) : "0" (var)) - /* * A trick to suppress uninitialized variable warning without generating any * code diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 517bd14e1222..b17f3cd18334 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -5,9 +5,7 @@ #ifdef __ECC -/* Some compiler specific definitions are overwritten here - * for Intel ECC compiler - */ +/* Compiler specific definitions for Intel ECC compiler */ #include diff --git a/include/linux/compiler.h b/include/linux/compiler.h index fc5004a4b07d..445348facea9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -161,7 +161,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #ifndef OPTIMIZER_HIDE_VAR -#define OPTIMIZER_HIDE_VAR(var) barrier() +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) #endif /* Not-quite-unique ID. */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 7cdd31a69719..f52ef0ad6781 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -653,6 +653,7 @@ extern int fb_new_modelist(struct fb_info *info); extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; +extern bool fb_center_logo; extern struct class *fb_class; #define for_each_registered_fb(i) \ diff --git a/include/linux/filter.h b/include/linux/filter.h index ad106d845b22..e4b473f85b46 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -277,6 +277,26 @@ struct sock_reuseport; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ @@ -778,6 +798,7 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); void bpf_clear_redirect_map(struct bpf_map *map); diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h index c1f003aadcce..94e9797e434c 100644 --- a/include/linux/fsl/ptp_qoriq.h +++ b/include/linux/fsl/ptp_qoriq.h @@ -120,6 +120,8 @@ struct qoriq_ptp_registers { /* Bit definitions for the TMR_STAT register */ #define STAT_VEC_SHIFT (0) /* Timer general purpose status vector */ #define STAT_VEC_MASK (0x3f) +#define ETS1_VLD (1<<24) +#define ETS2_VLD (1<<25) /* Bit definitions for the TMR_PRSC register */ #define PRSC_OCK_SHIFT (0) /* Output clock division/prescale factor. */ @@ -141,6 +143,9 @@ struct qoriq_ptp { struct ptp_clock *clock; struct ptp_clock_info caps; struct resource *rsrc; + struct dentry *debugfs_root; + struct device *dev; + bool extts_fifo_support; int irq; int phc_index; u64 alarm_interval; /* for periodic alarm */ @@ -166,4 +171,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val) iowrite32be(val, addr); } +#ifdef CONFIG_DEBUG_FS +void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp); +void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp); +#else +static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp) +{ } +static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp) +{ } +#endif + #endif diff --git a/include/linux/hid.h b/include/linux/hid.h index d99287327ef2..f9707d1dcb58 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -430,7 +430,7 @@ struct hid_local { */ struct hid_collection { - struct hid_collection *parent; + int parent_idx; /* device->collection */ unsigned type; unsigned usage; unsigned level; @@ -658,7 +658,6 @@ struct hid_parser { unsigned int *collection_stack; unsigned int collection_stack_ptr; unsigned int collection_stack_size; - struct hid_collection *active_collection; struct hid_device *device; unsigned int scan_flags; }; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f0885cc01db6..dcb6977afce9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1159,8 +1159,9 @@ struct hv_ring_buffer_debug_info { u32 bytes_avail_towrite; }; -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info); + +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); /* Vmbus interface */ #define vmbus_driver_register(driver) \ diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 6756fea18b69..e44746de95cd 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -54,6 +54,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_IPGRE: case ARPHRD_VOID: case ARPHRD_NONE: + case ARPHRD_RAWIP: return false; default: return true; diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 119f53941c12..8b4348f69bc5 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,14 @@ struct ip_mc_list { #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) +static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (skb_transport_offset(skb) + ip_transport_len(skb) < len) + return -EINVAL; + + return pskb_may_pull(skb, len); +} + extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); @@ -130,6 +139,6 @@ extern void ip_mc_unmap(struct in_device *); extern void ip_mc_remap(struct in_device *); extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr); extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr); -int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed); +int ip_mc_check_igmp(struct sk_buff *skb); #endif diff --git a/include/linux/in.h b/include/linux/in.h index 31b493734763..435e7f2a513a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -60,6 +60,11 @@ static inline bool ipv4_is_lbcast(__be32 addr) return addr == htonl(INADDR_BROADCAST); } +static inline bool ipv4_is_all_snoopers(__be32 addr) +{ + return addr == htonl(INADDR_ALLSNOOPERS_GROUP); +} + static inline bool ipv4_is_zeronet(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x00000000); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c672f34235e7..4a728dba02e2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -260,6 +260,7 @@ struct irq_affinity { /** * struct irq_affinity_desc - Interrupt affinity descriptor * @mask: cpumask to hold the affinity assignment + * @is_managed: 1 if the interrupt is managed internally */ struct irq_affinity_desc { struct cpumask mask; diff --git a/include/linux/ip.h b/include/linux/ip.h index 492bc6513533..482b7b7c9f30 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -34,4 +34,9 @@ static inline struct iphdr *ipip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_transport_header(skb); } + +static inline unsigned int ip_transport_len(const struct sk_buff *skb) +{ + return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb); +} #endif /* _LINUX_IP_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 495e834c1367..6d45ce784bea 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -104,6 +104,12 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) return (struct ipv6hdr *)skb_transport_header(skb); } +static inline unsigned int ipv6_transport_len(const struct sk_buff *skb) +{ + return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) - + skb_network_header_len(skb); +} + /* This structure contains results of exthdrs parsing as offsets from skb->nh. diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907..ad609617aeb8 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -160,6 +160,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } enum nvdimm_security_state { + NVDIMM_SECURITY_ERROR = -1, NVDIMM_SECURITY_DISABLED, NVDIMM_SECURITY_UNLOCKED, NVDIMM_SECURITY_LOCKED, @@ -234,7 +235,6 @@ static inline struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, cmd_mask, num_flush, flush_wpq, NULL, NULL); } -int nvdimm_security_setup_events(struct nvdimm *nvdimm); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a57b9a853aab..e675ef97a426 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -630,6 +630,7 @@ struct netdev_queue { } ____cacheline_aligned_in_smp; extern int sysctl_fb_tunnels_only_for_init_net; +extern int sysctl_devconf_inherit_init_net; static inline bool net_has_fallback_tunnels(const struct net *net) { diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index 6989e2e4eabf..25f9a770fb84 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -19,27 +19,18 @@ struct nf_conn; struct nf_ct_gre_keymap { struct list_head list; struct nf_conntrack_tuple tuple; -}; - -enum grep_conntrack { - GRE_CT_UNREPLIED, - GRE_CT_REPLIED, - GRE_CT_MAX -}; - -struct netns_proto_gre { - struct nf_proto_net nf; - rwlock_t keymap_lock; - struct list_head keymap_list; - unsigned int gre_timeouts[GRE_CT_MAX]; + struct rcu_head rcu; }; /* add new tuple->key_reply pair to keymap */ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t); +void nf_ct_gre_keymap_flush(struct net *net); /* delete keymap entries */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct); +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple); #endif /* __KERNEL__ */ #endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..082e2c41b7ff 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -25,7 +25,6 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); #else static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol) @@ -37,11 +36,6 @@ static inline int nf_ip_route(struct net *net, struct dst_entry **dst, { return -EOPNOTSUPP; } -static inline int nf_ip_reroute(struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - return -EOPNOTSUPP; -} #endif /* CONFIG_INET */ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4e8add270200..593d1b9c33a8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -126,6 +126,7 @@ void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); +bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, diff --git a/include/linux/of.h b/include/linux/of.h index fe472e5195a9..e240992e5cb6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -50,7 +50,6 @@ struct of_irq_controller; struct device_node { const char *name; - const char *type; phandle phandle; const char *full_name; struct fwnode_handle fwnode; diff --git a/include/linux/phy.h b/include/linux/phy.h index 0990f913d649..70f83d0d7469 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -48,6 +48,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) @@ -56,6 +57,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) #define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) +#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) extern const int phy_10_100_features_array[4]; @@ -461,8 +463,8 @@ struct phy_device { * only works for PHYs with IDs which match this field * name: The friendly name of this PHY type * phy_id_mask: Defines the important bits of the phy_id - * features: A list of features (speed, duplex, etc) supported - * by this PHY + * features: A mandatory list of features (speed, duplex, etc) + * supported by this PHY * flags: A bitfield defining certain other features this PHY * supports (like interrupts) * @@ -1045,7 +1047,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, int phy_ethtool_ksettings_set(struct phy_device *phydev, const struct ethtool_link_ksettings *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); -int phy_start_interrupts(struct phy_device *phydev); +void phy_request_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5f818fda96bd..35170f74ed80 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -763,6 +763,7 @@ struct qed_probe_params { u32 dp_module; u8 dp_level; bool is_vf; + bool recov_in_prog; }; #define QED_DRV_VER_STR_SIZE 12 @@ -809,6 +810,7 @@ struct qed_common_cb_ops { void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); + void (*schedule_recovery_handler)(void *dev); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data); void (*get_protocol_tlv_data)(void *dev, void *data); @@ -1056,6 +1058,24 @@ struct qed_common_ops { int (*db_recovery_del)(struct qed_dev *cdev, void __iomem *db_addr, void *db_data); +/** + * @brief recovery_process - Trigger a recovery process + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*recovery_process)(struct qed_dev *cdev); + +/** + * @brief recovery_prolog - Execute the prolog operations of a recovery process + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*recovery_prolog)(struct qed_dev *cdev); + /** * @brief update_drv_state - API to inform the change in the driver state. * diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 9904617a9730..5a00c7a473bf 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv); bool qede_rdma_supported(struct qede_dev *dev); #if IS_ENABLED(CONFIG_QED_RDMA) -int qede_rdma_dev_add(struct qede_dev *dev); +int qede_rdma_dev_add(struct qede_dev *dev, bool recovery); void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev); -void qede_rdma_dev_remove(struct qede_dev *dev); +void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery); void qede_rdma_event_changeaddr(struct qede_dev *edr); #else -static inline int qede_rdma_dev_add(struct qede_dev *dev) +static inline int qede_rdma_dev_add(struct qede_dev *dev, + bool recovery) { return 0; } static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {} -static inline void qede_rdma_dev_remove(struct qede_dev *dev) {} +static inline void qede_rdma_dev_remove(struct qede_dev *dev, + bool recovery) {} static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {} #endif #endif diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 10b19a192b2d..545f37138057 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -24,9 +24,13 @@ * called near the end of a function. Otherwise, the list can be * re-initialized for later re-use by wake_q_init(). * - * Note that this can cause spurious wakeups. schedule() callers + * NOTE that this can cause spurious wakeups. schedule() callers * must ensure the call is done inside a loop, confirming that the * wakeup condition has in fact occurred. + * + * NOTE that there is no guarantee the wakeup will happen any later than the + * wake_q_add() location. Therefore task must be ready to be woken at the + * location of the wake_q_add(). */ #include diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..c34595374e93 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1221,6 +1221,11 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif +struct bpf_flow_keys; +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys); bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, @@ -3218,6 +3223,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 32baf8e26735..987b6491b946 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -12,6 +12,11 @@ struct irq_affinity; /** * virtio_config_ops - operations for configuring a virtio device + * Note: Do not assume that a transport implements all of the operations + * getting/setting a value as a simple read/write! Generally speaking, + * any of @get/@set, @get_status/@set_status, or @get_features/ + * @finalize_features are NOT safe to be called from an atomic + * context. * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -22,7 +27,7 @@ struct irq_affinity; * offset: the offset of the configuration field * buf: the buffer to read the field value from. * len: the length of the buffer - * @generation: config generation counter + * @generation: config generation counter (optional) * vdev: the virtio_device * Returns the config generation counter * @get_status: read the status byte @@ -48,17 +53,17 @@ struct irq_affinity; * @del_vqs: free virtqueues found by find_vqs(). * @get_features: get the array of feature bits for this device. * vdev: the virtio_device - * Returns the first 32 feature bits (all we currently need). + * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device * This gives the final feature bits for the device: it can change * the dev->feature bits if it wants. * Returns 0 on success or error status - * @bus_name: return the bus name associated with the device + * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device * This returns a pointer to the bus name a la pci_name from which * the caller can then copy. - * @set_vq_affinity: set the affinity for a virtqueue. + * @set_vq_affinity: set the affinity for a virtqueue (optional). * @get_vq_affinity: get the affinity for a virtqueue (optional). */ typedef void vq_callback_t(struct virtqueue *); diff --git a/include/linux/xarray.h b/include/linux/xarray.h index f492e21c4aa2..5d9d318bcf7a 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -176,7 +176,8 @@ static inline bool xa_is_internal(const void *entry) */ static inline bool xa_is_err(const void *entry) { - return unlikely(xa_is_internal(entry)); + return unlikely(xa_is_internal(entry) && + entry >= xa_mk_internal(-MAX_ERRNO)); } /** @@ -286,7 +287,6 @@ struct xarray { */ #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC) -void xa_init_flags(struct xarray *, gfp_t flags); void *xa_load(struct xarray *, unsigned long index); void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *xa_erase(struct xarray *, unsigned long index); @@ -303,6 +303,24 @@ unsigned int xa_extract(struct xarray *, void **dst, unsigned long start, unsigned long max, unsigned int n, xa_mark_t); void xa_destroy(struct xarray *); +/** + * xa_init_flags() - Initialise an empty XArray with flags. + * @xa: XArray. + * @flags: XA_FLAG values. + * + * If you need to initialise an XArray with special flags (eg you need + * to take the lock from interrupt context), use this function instead + * of xa_init(). + * + * Context: Any context. + */ +static inline void xa_init_flags(struct xarray *xa, gfp_t flags) +{ + spin_lock_init(&xa->xa_lock); + xa->xa_flags = flags; + xa->xa_head = NULL; +} + /** * xa_init() - Initialise an empty XArray. * @xa: XArray. @@ -342,20 +360,45 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark) } /** - * xa_for_each() - Iterate over a portion of an XArray. + * xa_for_each_start() - Iterate over a portion of an XArray. * @xa: XArray. - * @entry: Entry retrieved from array. * @index: Index of @entry. - * @max: Maximum index to retrieve from array. - * @filter: Selection criterion. + * @entry: Entry retrieved from array. + * @start: First index to retrieve from array. * - * Initialise @index to the lowest index you want to retrieve from the - * array. During the iteration, @entry will have the value of the entry - * stored in @xa at @index. The iteration will skip all entries in the - * array which do not match @filter. You may modify @index during the - * iteration if you want to skip or reprocess indices. It is safe to modify - * the array during the iteration. At the end of the iteration, @entry will - * be set to NULL and @index will have a value less than or equal to max. + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. You may modify @index during the iteration if you + * want to skip or reprocess indices. It is safe to modify the array + * during the iteration. At the end of the iteration, @entry will be set + * to NULL and @index will have a value less than or equal to max. + * + * xa_for_each_start() is O(n.log(n)) while xas_for_each() is O(n). You have + * to handle your own locking with xas_for_each(), and if you have to unlock + * after each iteration, it will also end up being O(n.log(n)). + * xa_for_each_start() will spin if it hits a retry entry; if you intend to + * see retry entries, you should use the xas_for_each() iterator instead. + * The xas_for_each() iterator will expand into more inline code than + * xa_for_each_start(). + * + * Context: Any context. Takes and releases the RCU lock. + */ +#define xa_for_each_start(xa, index, entry, start) \ + for (index = start, \ + entry = xa_find(xa, &index, ULONG_MAX, XA_PRESENT); \ + entry; \ + entry = xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT)) + +/** + * xa_for_each() - Iterate over present entries in an XArray. + * @xa: XArray. + * @index: Index of @entry. + * @entry: Entry retrieved from array. + * + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. You may modify @index during the iteration if you want + * to skip or reprocess indices. It is safe to modify the array during the + * iteration. At the end of the iteration, @entry will be set to NULL and + * @index will have a value less than or equal to max. * * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n). You have * to handle your own locking with xas_for_each(), and if you have to unlock @@ -366,9 +409,36 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark) * * Context: Any context. Takes and releases the RCU lock. */ -#define xa_for_each(xa, entry, index, max, filter) \ - for (entry = xa_find(xa, &index, max, filter); entry; \ - entry = xa_find_after(xa, &index, max, filter)) +#define xa_for_each(xa, index, entry) \ + xa_for_each_start(xa, index, entry, 0) + +/** + * xa_for_each_marked() - Iterate over marked entries in an XArray. + * @xa: XArray. + * @index: Index of @entry. + * @entry: Entry retrieved from array. + * @filter: Selection criterion. + * + * During the iteration, @entry will have the value of the entry stored + * in @xa at @index. The iteration will skip all entries in the array + * which do not match @filter. You may modify @index during the iteration + * if you want to skip or reprocess indices. It is safe to modify the array + * during the iteration. At the end of the iteration, @entry will be set to + * NULL and @index will have a value less than or equal to max. + * + * xa_for_each_marked() is O(n.log(n)) while xas_for_each_marked() is O(n). + * You have to handle your own locking with xas_for_each(), and if you have + * to unlock after each iteration, it will also end up being O(n.log(n)). + * xa_for_each_marked() will spin if it hits a retry entry; if you intend to + * see retry entries, you should use the xas_for_each_marked() iterator + * instead. The xas_for_each_marked() iterator will expand into more inline + * code than xa_for_each_marked(). + * + * Context: Any context. Takes and releases the RCU lock. + */ +#define xa_for_each_marked(xa, index, entry, filter) \ + for (index = 0, entry = xa_find(xa, &index, ULONG_MAX, filter); \ + entry; entry = xa_find_after(xa, &index, ULONG_MAX, filter)) #define xa_trylock(xa) spin_trylock(&(xa)->xa_lock) #define xa_lock(xa) spin_lock(&(xa)->xa_lock) @@ -393,39 +463,12 @@ void *__xa_erase(struct xarray *, unsigned long index); void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t); void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old, void *entry, gfp_t); +int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t); int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t); int __xa_reserve(struct xarray *, unsigned long index, gfp_t); void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t); void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t); -/** - * __xa_insert() - Store this entry in the XArray unless another entry is - * already present. - * @xa: XArray. - * @index: Index into array. - * @entry: New entry. - * @gfp: Memory allocation flags. - * - * If you would rather see the existing entry in the array, use __xa_cmpxchg(). - * This function is for users who don't care what the entry is, only that - * one is present. - * - * Context: Any context. Expects xa_lock to be held on entry. May - * release and reacquire xa_lock if the @gfp flags permit. - * Return: 0 if the store succeeded. -EEXIST if another entry was present. - * -ENOMEM if memory could not be allocated. - */ -static inline int __xa_insert(struct xarray *xa, unsigned long index, - void *entry, gfp_t gfp) -{ - void *curr = __xa_cmpxchg(xa, index, NULL, entry, gfp); - if (!curr) - return 0; - if (xa_is_err(curr)) - return xa_err(curr); - return -EEXIST; -} - /** * xa_store_bh() - Store this entry in the XArray. * @xa: XArray. @@ -453,7 +496,7 @@ static inline void *xa_store_bh(struct xarray *xa, unsigned long index, } /** - * xa_store_irq() - Erase this entry from the XArray. + * xa_store_irq() - Store this entry in the XArray. * @xa: XArray. * @index: Index into array. * @entry: New entry. @@ -615,24 +658,83 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index, * @entry: New entry. * @gfp: Memory allocation flags. * - * If you would rather see the existing entry in the array, use xa_cmpxchg(). - * This function is for users who don't care what the entry is, only that - * one is present. + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. * - * Context: Process context. Takes and releases the xa_lock. - * May sleep if the @gfp flags permit. + * Context: Any context. Takes and releases the xa_lock. May sleep if + * the @gfp flags permit. * Return: 0 if the store succeeded. -EEXIST if another entry was present. * -ENOMEM if memory could not be allocated. */ static inline int xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) { - void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp); - if (!curr) - return 0; - if (xa_is_err(curr)) - return xa_err(curr); - return -EEXIST; + int err; + + xa_lock(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock(xa); + + return err; +} + +/** + * xa_insert_bh() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Any context. Takes and releases the xa_lock while + * disabling softirqs. May sleep if the @gfp flags permit. + * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int xa_insert_bh(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + int err; + + xa_lock_bh(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock_bh(xa); + + return err; +} + +/** + * xa_insert_irq() - Store this entry in the XArray unless another entry is + * already present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Process context. Takes and releases the xa_lock while + * disabling interrupts. May sleep if the @gfp flags permit. + * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +static inline int xa_insert_irq(struct xarray *xa, unsigned long index, + void *entry, gfp_t gfp) +{ + int err; + + xa_lock_irq(xa); + err = __xa_insert(xa, index, entry, gfp); + xa_unlock_irq(xa); + + return err; } /** @@ -970,8 +1072,8 @@ static inline bool xa_is_sibling(const void *entry) (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1)); } -#define XA_ZERO_ENTRY xa_mk_internal(256) -#define XA_RETRY_ENTRY xa_mk_internal(257) +#define XA_RETRY_ENTRY xa_mk_internal(256) +#define XA_ZERO_ENTRY xa_mk_internal(257) /** * xa_is_zero() - Is the entry a zero entry? @@ -995,6 +1097,17 @@ static inline bool xa_is_retry(const void *entry) return unlikely(entry == XA_RETRY_ENTRY); } +/** + * xa_is_advanced() - Is the entry only permitted for the advanced API? + * @entry: Entry to be stored in the XArray. + * + * Return: %true if the entry cannot be stored by the normal API. + */ +static inline bool xa_is_advanced(const void *entry) +{ + return xa_is_internal(entry) && (entry <= XA_RETRY_ENTRY); +} + /** * typedef xa_update_node_t - A callback function from the XArray. * @node: The node which is being processed diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 1656c5978498..20d523ee2fec 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -49,6 +49,7 @@ struct prefix_info { struct in6_addr prefix; }; +#include #include #include #include @@ -201,6 +202,15 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, /* * multicast prototypes (mcast.c) */ +static inline int ipv6_mc_may_pull(struct sk_buff *skb, + unsigned int len) +{ + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len) + return -EINVAL; + + return pskb_may_pull(skb, len); +} + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, @@ -219,7 +229,8 @@ void ipv6_mc_unmap(struct inet6_dev *idev); void ipv6_mc_remap(struct inet6_dev *idev); void ipv6_mc_init_dev(struct inet6_dev *idev); void ipv6_mc_destroy_dev(struct inet6_dev *idev); -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed); +int ipv6_mc_check_icmpv6(struct sk_buff *skb); +int ipv6_mc_check_mld(struct sk_buff *skb); void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp); bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, @@ -489,6 +500,20 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr) #endif } +static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + __be64 *p = (__be64 *)addr; + + return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | + (p[1] ^ cpu_to_be64(0x6a))) == 0UL; +#else + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | + addr->s6_addr32[1] | addr->s6_addr32[2] | + (addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0; +#endif +} + #ifdef CONFIG_PROC_FS int if6_proc_init(void); void if6_proc_exit(void); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1adefe42c0a6..2bfb87eb98ce 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -20,18 +20,6 @@ struct sock; struct socket; struct rxrpc_call; -/* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - /* * Debug ID counter for tracing. */ @@ -73,10 +61,6 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *, struct key *); -int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, - enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); diff --git a/include/net/ax25.h b/include/net/ax25.h index 3f9aea8087e3..8b7eb46ad72d 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -201,6 +201,18 @@ static inline void ax25_hold_route(ax25_route *ax25_rt) void __ax25_put_route(ax25_route *ax25_rt); +extern rwlock_t ax25_route_lock; + +static inline void ax25_route_lock_use(void) +{ + read_lock(&ax25_route_lock); +} + +static inline void ax25_route_lock_unuse(void) +{ + read_unlock(&ax25_route_lock); +} + static inline void ax25_put_route(ax25_route *ax25_rt) { if (refcount_dec_and_test(&ax25_rt->refcount)) diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index fc3111515f5c..c781e1afd683 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -180,6 +180,19 @@ struct port; #pragma pack(8) #endif +struct bond_3ad_stats { + atomic64_t lacpdu_rx; + atomic64_t lacpdu_tx; + atomic64_t lacpdu_unknown_rx; + atomic64_t lacpdu_illegal_rx; + + atomic64_t marker_rx; + atomic64_t marker_tx; + atomic64_t marker_resp_rx; + atomic64_t marker_resp_tx; + atomic64_t marker_unknown_rx; +}; + /* aggregator structure(43.4.5 in the 802.3ad standard) */ typedef struct aggregator { struct mac_addr aggregator_mac_address; @@ -265,6 +278,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ + struct bond_3ad_stats stats; u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; @@ -272,6 +286,7 @@ struct ad_bond_info { struct ad_slave_info { struct aggregator aggregator; /* 802.3ad aggregator structure */ struct port port; /* 802.3ad port structure */ + struct bond_3ad_stats stats; u16 id; }; @@ -307,5 +322,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, int bond_3ad_set_carrier(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); +int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); +size_t bond_3ad_stats_size(void); #endif /* _NET_BOND_3AD_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index a81a1b7a67d7..85c9eabaf056 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -30,7 +30,6 @@ struct devlink { struct list_head param_list; struct list_head region_list; u32 snapshot_id; - struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; const struct devlink_ops *ops; struct device *dev; @@ -49,6 +48,7 @@ struct devlink_port_attrs { struct devlink_port { struct list_head list; + struct list_head param_list; struct devlink *devlink; unsigned index; bool registered; @@ -367,12 +367,17 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + DEVLINK_PARAM_GENERIC_ID_WOL, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1, }; +enum devlink_param_wol_types { + DEVLINK_PARAM_WAKE_MAGIC = (1 << 0), +}; + #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset" #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL @@ -397,6 +402,9 @@ enum devlink_param_generic_id { #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy" #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8 +#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan" +#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8 + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ @@ -424,36 +432,6 @@ struct devlink_region; typedef void devlink_snapshot_data_dest_t(const void *data); -struct devlink_health_buffer; -struct devlink_health_reporter; - -/** - * struct devlink_health_reporter_ops - Reporter operations - * @name: reporter name - * dump_size: dump buffer size allocated by the devlink - * diagnose_size: diagnose buffer size allocated by the devlink - * recover: callback to recover from reported error - * if priv_ctx is NULL, run a full recover - * dump: callback to dump an object - * if priv_ctx is NULL, run a full dump - * diagnose: callback to diagnose the current status - */ - -struct devlink_health_reporter_ops { - char *name; - unsigned int dump_size; - unsigned int diagnose_size; - int (*recover)(struct devlink_health_reporter *reporter, - void *priv_ctx); - int (*dump)(struct devlink_health_reporter *reporter, - struct devlink_health_buffer **buffers_array, - unsigned int buffer_size, unsigned int num_buffers, - void *priv_ctx); - int (*diagnose)(struct devlink_health_reporter *reporter, - struct devlink_health_buffer **buffers_array, - unsigned int buffer_size, unsigned int num_buffers); -}; - struct devlink_ops { int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, @@ -598,11 +576,26 @@ int devlink_params_register(struct devlink *devlink, void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count); +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val); +int +devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val); +int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val); void devlink_param_value_changed(struct devlink *devlink, u32 param_id); +void devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id); void devlink_param_value_str_fill(union devlink_param_value *dst_val, const char *src); struct devlink_region *devlink_region_create(struct devlink *devlink, @@ -615,34 +608,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor); -int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, - int attrtype); -void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer); -void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer); -int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, - char *name); -int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, - u8 value); -int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, - u32 value); -int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, - u64 value); -int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, - char *name); -int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, - void *data, int len); -struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv); -void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); - -void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter); -int devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx); #else static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, @@ -850,6 +815,21 @@ devlink_params_unregister(struct devlink *devlink, } +static inline int +devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return 0; +} + +static inline void +devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ +} + static inline int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) @@ -864,11 +844,33 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, return -EOPNOTSUPP; } +static inline int +devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val) +{ + return -EOPNOTSUPP; +} + +static inline int +devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val) +{ + return -EOPNOTSUPP; +} + static inline void devlink_param_value_changed(struct devlink *devlink, u32 param_id) { } +static inline void +devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id) +{ +} + static inline void devlink_param_value_str_fill(union devlink_param_value *dst_val, const char *src) @@ -903,91 +905,6 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, return 0; } -static inline int -devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, - int attrtype) -{ - return 0; -} - -static inline void -devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer) -{ -} - -static inline void -devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer) -{ -} - -static inline int -devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, - char *name) -{ - return 0; -} - -static inline int -devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, - u8 value) -{ - return 0; -} - -static inline int -devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, - u32 value) -{ - return 0; -} - -static inline int -devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, - u64 value) -{ - return 0; -} - -static inline int -devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, - char *name) -{ - return 0; -} - -static inline int -devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, - void *data, int len) -{ - return 0; -} - -static inline struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv) -{ - return NULL; -} - -static inline void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ -} - -static inline void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return NULL; -} - -static inline int -devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - return 0; -} #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 371b3b45fd5c..ff40e1d08157 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -139,8 +139,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[88 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) + u64 icsk_ca_priv[104 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -314,4 +314,29 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + +#define TCP_PINGPONG_THRESH 3 + +static inline void inet_csk_enter_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH; +} + +static inline void inet_csk_exit_pingpong_mode(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pingpong = 0; +} + +static inline bool inet_csk_in_pingpong_mode(struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ack.pingpong < U8_MAX) + icsk->icsk_ack.pingpong++; +} #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 1662cbc0b46b..b02bf737d019 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -77,8 +77,8 @@ struct inet_frag_queue { struct timer_list timer; spinlock_t lock; refcount_t refcnt; - struct sk_buff *fragments; /* Used in IPv6. */ - struct rb_root rb_fragments; /* Used in IPv4. */ + struct sk_buff *fragments; /* used in 6lopwpan IPv6. */ + struct rb_root rb_fragments; /* Used in IPv4/IPv6. */ struct sk_buff *fragments_tail; struct sk_buff *last_run_head; ktime_t stamp; @@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val) extern const u8 ip_frag_ecn_table[16]; +/* Return values of inet_frag_queue_insert() */ +#define IPFRAG_OK 0 +#define IPFRAG_DUP 1 +#define IPFRAG_OVERLAP 2 +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + int offset, int end); +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + struct sk_buff *parent); +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data); +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q); + #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c5969762a8f4..9c8214d2116d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -241,7 +241,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 34f019650941..f069f64ebf29 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -267,7 +267,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const u8 proto); + const u8 proto, int tunnel_hlen); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a0d2e0bb9a94..047f9a5ccaad 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -453,9 +453,6 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); - int (*csum_check)(int af, struct sk_buff *skb, - struct ip_vs_protocol *pp); - const char *(*state_name)(int state); void (*state_transition)(struct ip_vs_conn *cp, int direction, diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 6ced1e6899b6..28aa9b30aece 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -82,8 +82,15 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ - head = fq->q.fragments; - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) + if (!(fq->q.flags & INET_FRAG_FIRST_IN)) + goto out; + + /* sk_buff::dev and sk_buff::rbnode are unionized. So we + * pull the head out of the tree in order to be able to + * deal with head->dev. + */ + head = inet_frag_pull_head(&fq->q); + if (!head) goto out; head->dev = dev; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 99d4148e0f90..a68ced28d8f4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,9 @@ struct net { #endif #if IS_ENABLED(CONFIG_CAN) struct netns_can can; +#endif +#ifdef CONFIG_XDP_SOCKETS + struct netns_xdp xdp; #endif struct sock *diag_nlsk; atomic_t fnhe_genid; diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 4cd56808ac4e..89808ce293c4 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -43,7 +43,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) } struct net_device *setup_pre_routing(struct sk_buff *skb); -void br_netfilter_enable(void); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct net *net, struct sk_buff *skb); diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 135ee702c7b0..2c8c2b023848 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -22,5 +22,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp; #ifdef CONFIG_NF_CT_PROTO_UDPLITE extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre; +#endif #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 249d0a5b12b8..b5aac5ae5129 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -187,8 +187,6 @@ bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple); -bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index afc9b3620473..ae41e92251dd 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -26,7 +26,7 @@ int nf_conntrack_init_net(struct net *net); void nf_conntrack_cleanup_net(struct net *net); void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list); -int nf_conntrack_proto_pernet_init(struct net *net); +void nf_conntrack_proto_pernet_init(struct net *net); void nf_conntrack_proto_pernet_fini(struct net *net); int nf_conntrack_proto_init(void); @@ -39,8 +39,7 @@ void nf_conntrack_init_end(void); void nf_conntrack_cleanup_end(void); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto); + const struct nf_conntrack_tuple *orig); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ae7b86f587f2..778087591983 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -27,27 +27,6 @@ struct nf_conntrack_l4proto { /* protoinfo nlattr size, closes a hole */ u16 nlattr_size; - /* Try to fill in the third arg: dataoff is offset past network protocol - hdr. Return true if possible. */ - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple); - - /* Invert the per-proto part of the tuple: ie. turn xmit into reply. - * Only used by icmp, most protocols use a generic version. - */ - bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig); - - /* Returns verdict for packet, or -1 for invalid. */ - int (*packet)(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state); - - /* Called when a conntrack entry is destroyed */ - void (*destroy)(struct nf_conn *ct); - /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); @@ -79,17 +58,23 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif - unsigned int *net_id; - /* Init l4proto pernet data */ - int (*init_net)(struct net *net); - - /* Return the per-net protocol part. */ - struct nf_proto_net *(*get_net_proto)(struct net *net); - - /* Module (if any) which this is connected to. */ - struct module *me; }; +bool icmp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple); + +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig); + int nf_conntrack_icmpv4_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -99,31 +84,63 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state); + +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_dccp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state); + +void nf_conntrack_generic_init_net(struct net *net); +void nf_conntrack_tcp_init_net(struct net *net); +void nf_conntrack_udp_init_net(struct net *net); +void nf_conntrack_gre_init_net(struct net *net); +void nf_conntrack_dccp_init_net(struct net *net); +void nf_conntrack_sctp_init_net(struct net *net); +void nf_conntrack_icmp_init_net(struct net *net); +void nf_conntrack_icmpv6_init_net(struct net *net); + /* Existing built-in generic protocol */ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO IPPROTO_UDPLITE -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto); - -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto); -void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); - -/* Protocol pernet registration. */ -int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_pernet_unregister_one(struct net *net, - const struct nf_conntrack_l4proto *proto); -int nf_ct_l4proto_pernet_register(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); -void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const proto[], - unsigned int num_proto); - -/* Protocol global registration. */ -int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto); /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, @@ -192,4 +209,11 @@ static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net) } #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +static inline struct nf_gre_net *nf_gre_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.gre; +} +#endif + #endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 7d5cda7ce32a..3e370cb36263 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -84,7 +84,6 @@ struct flow_offload { struct nf_flow_route { struct { struct dst_entry *dst; - int ifindex; } tuple[FLOW_OFFLOAD_DIR_MAX]; }; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index a17eb2f8d40e..8aff77cafb8b 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -47,10 +47,6 @@ extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct); -/* Is this tuple already taken? (not by us)*/ -int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack); - static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) { #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 841835a387e1..45eba7d7ab38 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1011,22 +1011,33 @@ void nft_unregister_expr(struct nft_expr_type *); int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); +/** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ +struct nft_object_hash_key { + const char *name; + const struct nft_table *table; +}; + /** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @table: table this object belongs to - * @name: name of this stateful object + * @key: keys that identify this object + * @rhlhead: nft_objname_ht node * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @ops: object operations - * @data: object data, layout depends on type + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; - char *name; - struct nft_table *table; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; u32 genmask:2, use:30; u64 handle; @@ -1043,11 +1054,12 @@ static inline void *nft_obj_data(const struct nft_object *obj) #define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) -struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask); -void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 2046d104f323..7281895fa6d9 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -80,6 +80,22 @@ struct nft_regs; struct nft_pktinfo; void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_lookup_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt); #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 51cba0b8adf5..f19b53130bf7 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -18,21 +18,11 @@ struct ctl_table_header; struct nf_conntrack_ecache; -struct nf_proto_net { -#ifdef CONFIG_SYSCTL - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif - unsigned int users; -}; - struct nf_generic_net { - struct nf_proto_net pn; unsigned int timeout; }; struct nf_tcp_net { - struct nf_proto_net pn; unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; unsigned int tcp_loose; unsigned int tcp_be_liberal; @@ -46,18 +36,15 @@ enum udp_conntrack { }; struct nf_udp_net { - struct nf_proto_net pn; unsigned int timeouts[UDP_CT_MAX]; }; struct nf_icmp_net { - struct nf_proto_net pn; unsigned int timeout; }; #ifdef CONFIG_NF_CT_PROTO_DCCP struct nf_dccp_net { - struct nf_proto_net pn; int dccp_loose; unsigned int dccp_timeout[CT_DCCP_MAX + 1]; }; @@ -65,11 +52,23 @@ struct nf_dccp_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net { - struct nf_proto_net pn; unsigned int timeouts[SCTP_CONNTRACK_MAX]; }; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE +enum gre_conntrack { + GRE_CT_UNREPLIED, + GRE_CT_REPLIED, + GRE_CT_MAX +}; + +struct nf_gre_net { + struct list_head keymap_list; + unsigned int timeouts[GRE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; @@ -82,6 +81,9 @@ struct nf_ip_net { #ifdef CONFIG_NF_CT_PROTO_SCTP struct nf_sctp_net sctp; #endif +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net gre; +#endif }; struct ct_pcpu { diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h new file mode 100644 index 000000000000..e5734261ba0a --- /dev/null +++ b/include/net/netns/xdp.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NETNS_XDP_H__ +#define __NETNS_XDP_H__ + +#include +#include + +struct netns_xdp { + struct mutex lock; + struct hlist_head list; +}; + +#endif /* __NETNS_XDP_H__ */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9481f2c142e2..7a4957599874 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -580,8 +580,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); -void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, - unsigned int len); +void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); #ifdef CONFIG_NET_SCHED int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type, void *type_data); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 003020eb6e66..58e4b23cecf4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -199,6 +199,8 @@ struct sctp_sock { __u32 flowlabel; __u8 dscp; + int pf_retrans; + /* The initial Path MTU to use for new associations. */ __u32 pathmtu; @@ -209,6 +211,8 @@ struct sctp_sock { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; + __u32 default_ss; + struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; struct sctp_assocparams assocparams; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5c950180d61b..a6e0355921e1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1608,6 +1608,7 @@ struct tcp_fastopen_request { struct msghdr *data; /* data in MSG_FASTOPEN */ size_t size; int copied; /* queued in tcp_connect() */ + struct ubuf_info *uarg; }; void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); diff --git a/include/net/tls.h b/include/net/tls.h index 90bf52db573e..4592606e136a 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -120,6 +120,8 @@ struct tls_rec { struct scatterlist sg_aead_out[2]; char aad_space[TLS_AAD_SPACE_SIZE]; + u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + + TLS_CIPHER_AES_GCM_128_SALT_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 13acb9803a6d..61cf7dbb6782 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -42,6 +42,7 @@ struct xdp_umem { struct work_struct work; struct page **pgs; u32 npgs; + int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; u16 queue_id; diff --git a/include/sound/soc.h b/include/sound/soc.h index 8ec1de856ee7..e665f111b0d2 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -985,6 +985,12 @@ struct snd_soc_dai_link { /* Do not create a PCM for this DAI link (Backend link) */ unsigned int ignore:1; + /* + * This driver uses legacy platform naming. Set by the core, machine + * drivers should not modify this value. + */ + unsigned int legacy_platform:1; + struct list_head list; /* DAI link list of the soc card */ struct snd_soc_dobj dobj; /* For topology */ }; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 33d291888ba9..e3f005eae1f7 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -25,6 +25,7 @@ enum afs_call_trace { afs_call_trace_alloc, afs_call_trace_free, + afs_call_trace_get, afs_call_trace_put, afs_call_trace_wake, afs_call_trace_work, @@ -159,6 +160,7 @@ enum afs_file_error { #define afs_call_traces \ EM(afs_call_trace_alloc, "ALLOC") \ EM(afs_call_trace_free, "FREE ") \ + EM(afs_call_trace_get, "GET ") \ EM(afs_call_trace_put, "PUT ") \ EM(afs_call_trace_wake, "WAKE ") \ E_(afs_call_trace_work, "WORK ") diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 7e39d2fc7c75..44acfbca1266 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -46,65 +46,6 @@ TRACE_EVENT(devlink_hwmsg, (int) __entry->len, __get_dynamic_array(buf), __entry->len) ); -TRACE_EVENT(devlink_health_report, - TP_PROTO(const struct devlink *devlink, const char *reporter_name, - const char *msg), - - TP_ARGS(devlink, reporter_name, msg), - - TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) - __string(reporter_name, msg) - __string(msg, msg) - ), - - TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); - __assign_str(reporter_name, reporter_name); - __assign_str(msg, msg); - ), - - TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s", - __get_str(bus_name), __get_str(dev_name), - __get_str(driver_name), __get_str(reporter_name), - __get_str(msg)) -); - -TRACE_EVENT(devlink_health_recover_aborted, - TP_PROTO(const struct devlink *devlink, const char *reporter_name, - bool health_state, u64 time_since_last_recover), - - TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover), - - TP_STRUCT__entry( - __string(bus_name, devlink->dev->bus->name) - __string(dev_name, dev_name(devlink->dev)) - __string(driver_name, devlink->dev->driver->name) - __string(reporter_name, reporter_name) - __field(bool, health_state) - __field(u64, time_since_last_recover) - ), - - TP_fast_assign( - __assign_str(bus_name, devlink->dev->bus->name); - __assign_str(dev_name, dev_name(devlink->dev)); - __assign_str(driver_name, devlink->dev->driver->name); - __assign_str(reporter_name, reporter_name); - __entry->health_state = health_state; - __entry->time_since_last_recover = time_since_last_recover; - ), - - TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted", - __get_str(bus_name), __get_str(dev_name), - __get_str(driver_name), __get_str(reporter_name), - __entry->health_state, - __entry->time_since_last_recover) -); - #endif /* _TRACE_DEVLINK_H */ /* This part must be outside protection */ @@ -123,9 +64,6 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink, { } -static inline void trace_devlink_health(const char *msg) -{ -} #endif /* _TRACE_DEVLINK_H */ #endif diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h new file mode 100644 index 000000000000..6c2bafcade18 --- /dev/null +++ b/include/trace/events/mlxsw.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlxsw + +#if !defined(_MLXSW_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _MLXSW_TRACEPOINT_H + +#include + +struct mlxsw_sp; +struct mlxsw_sp_acl_atcam_region; + +TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill, + TP_PROTO(const struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_acl_atcam_region *aregion), + + TP_ARGS(mlxsw_sp, aregion), + + TP_STRUCT__entry( + __field(const void *, mlxsw_sp) + __field(const void *, aregion) + ), + + TP_fast_assign( + __entry->mlxsw_sp = mlxsw_sp; + __entry->aregion = aregion; + ), + + TP_printk("mlxsw_sp %p, aregion %p", + __entry->mlxsw_sp, __entry->aregion) +); + +#endif /* _MLXSW_TRACEPOINT_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/android/binder_ctl.h b/include/uapi/linux/android/binderfs.h similarity index 83% rename from include/uapi/linux/android/binder_ctl.h rename to include/uapi/linux/android/binderfs.h index 65b2efd1a0a5..87410477aea9 100644 --- a/include/uapi/linux/android/binder_ctl.h +++ b/include/uapi/linux/android/binderfs.h @@ -4,8 +4,8 @@ * */ -#ifndef _UAPI_LINUX_BINDER_CTL_H -#define _UAPI_LINUX_BINDER_CTL_H +#ifndef _UAPI_LINUX_BINDERFS_H +#define _UAPI_LINUX_BINDERFS_H #include #include @@ -22,8 +22,8 @@ */ struct binderfs_device { char name[BINDERFS_MAX_NAME + 1]; - __u8 major; - __u8 minor; + __u32 major; + __u32 minor; }; /** @@ -31,5 +31,5 @@ struct binderfs_device { */ #define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) -#endif /* _UAPI_LINUX_BINDER_CTL_H */ +#endif /* _UAPI_LINUX_BINDERFS_H */ diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 6fa38d001d84..498eec813494 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -138,6 +138,7 @@ struct blk_zone_range { * @BLKRESETZONE: Reset the write pointer of the zones in the specified * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. + * @BLKGETNRZONES: Get the total number of zones of the device. */ #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91c43884f295..60b99b730a41 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -14,6 +14,7 @@ /* Extended instruction set based on top of classic BPF */ /* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ @@ -2540,6 +2541,7 @@ struct __sk_buff { __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; __u32 wire_len; + __u32 gso_segs; }; struct bpf_tunnel_key { diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6b26bb2ce4dc..61b4447a6c5b 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -89,12 +89,10 @@ enum devlink_command { DEVLINK_CMD_REGION_DEL, DEVLINK_CMD_REGION_READ, - DEVLINK_CMD_HEALTH_REPORTER_GET, - DEVLINK_CMD_HEALTH_REPORTER_SET, - DEVLINK_CMD_HEALTH_REPORTER_RECOVER, - DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + DEVLINK_CMD_PORT_PARAM_GET, /* can dump */ + DEVLINK_CMD_PORT_PARAM_SET, + DEVLINK_CMD_PORT_PARAM_NEW, + DEVLINK_CMD_PORT_PARAM_DEL, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -292,24 +290,6 @@ enum devlink_attr { DEVLINK_ATTR_REGION_CHUNK_ADDR, /* u64 */ DEVLINK_ATTR_REGION_CHUNK_LEN, /* u64 */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME, /* string */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY, /* nested */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, /* u8 */ - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, /* dynamic */ - - DEVLINK_ATTR_HEALTH_REPORTER, /* nested */ - DEVLINK_ATTR_HEALTH_REPORTER_NAME, /* string */ - DEVLINK_ATTR_HEALTH_REPORTER_STATE, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_ERR, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, /* u8 */ - DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, /* u64 */ - DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, /* u8 */ - /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h index caf8dc019250..325395f56bfa 100644 --- a/include/uapi/linux/icmpv6.h +++ b/include/uapi/linux/icmpv6.h @@ -108,6 +108,8 @@ struct icmp6hdr { #define ICMPV6_MOBILE_PREFIX_SOL 146 #define ICMPV6_MOBILE_PREFIX_ADV 147 +#define ICMPV6_MRDISC_ADV 151 + /* * Codes for Destination Unreachable */ diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 61a1bf6e865e..790585f0e61b 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -117,6 +117,30 @@ struct ad_info { __u8 partner_system[ETH_ALEN]; }; +/* Embedded inside LINK_XSTATS_TYPE_BOND */ +enum { + BOND_XSTATS_UNSPEC, + BOND_XSTATS_3AD, + __BOND_XSTATS_MAX +}; +#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1) + +/* Embedded inside BOND_XSTATS_3AD */ +enum { + BOND_3AD_STAT_LACPDU_RX, + BOND_3AD_STAT_LACPDU_TX, + BOND_3AD_STAT_LACPDU_UNKNOWN_RX, + BOND_3AD_STAT_LACPDU_ILLEGAL_RX, + BOND_3AD_STAT_MARKER_RX, + BOND_3AD_STAT_MARKER_TX, + BOND_3AD_STAT_MARKER_RESP_RX, + BOND_3AD_STAT_MARKER_RESP_TX, + BOND_3AD_STAT_MARKER_UNKNOWN_RX, + BOND_3AD_STAT_PAD, + __BOND_3AD_STAT_MAX +}; +#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1) + #endif /* _LINUX_IF_BONDING_H */ /* diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d6533828123a..5b225ff63b48 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -925,6 +925,7 @@ enum { enum { LINK_XSTATS_TYPE_UNSPEC, LINK_XSTATS_TYPE_BRIDGE, + LINK_XSTATS_TYPE_BOND, __LINK_XSTATS_TYPE_MAX }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) diff --git a/include/uapi/linux/igmp.h b/include/uapi/linux/igmp.h index 7e44ac02ca18..90c28bc466c6 100644 --- a/include/uapi/linux/igmp.h +++ b/include/uapi/linux/igmp.h @@ -93,6 +93,7 @@ struct igmpv3_query { #define IGMP_MTRACE_RESP 0x1e #define IGMP_MTRACE 0x1f +#define IGMP_MRDISC_ADV 0x30 /* From RFC4286 */ /* * Use the BSD names for these for compatibility diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f6052e70bf40..e7ad9d350a28 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -268,7 +268,7 @@ struct sockaddr_in { #define IN_MULTICAST(a) IN_CLASSD(a) #define IN_MULTICAST_NET 0xe0000000 -#define IN_BADCLASS(a) ((((long int) (a) ) == 0xffffffff) +#define IN_BADCLASS(a) (((long int) (a) ) == (long int)0xffffffff) #define IN_EXPERIMENTAL(a) IN_BADCLASS((a)) #define IN_CLASSE(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000) @@ -292,10 +292,11 @@ struct sockaddr_in { #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) /* Defines for Multicast INADDR */ -#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ -#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ -#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ -#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ +#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */ +#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ #endif /* contains the htonl type stuff.. */ diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index fb78f6f500f3..f056b2a00d5c 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -26,13 +26,17 @@ */ struct input_event { -#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL) +#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__) struct timeval time; #define input_event_sec time.tv_sec #define input_event_usec time.tv_usec #else __kernel_ulong_t __sec; +#if defined(__sparc__) && defined(__arch64__) + unsigned int __usec; +#else __kernel_ulong_t __usec; +#endif #define input_event_sec __sec #define input_event_usec __usec #endif diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 7de4f1bdaf06..030302893d96 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -219,6 +219,7 @@ enum nft_chain_attributes { * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32) + * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -231,6 +232,7 @@ enum nft_rule_attributes { NFTA_RULE_USERDATA, NFTA_RULE_PAD, NFTA_RULE_ID, + NFTA_RULE_POSITION_ID, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) @@ -789,6 +791,8 @@ enum nft_exthdr_attributes { * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) * @NFT_META_PRANDOM: a 32bit pseudo-random number * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp) + * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind) + * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind) */ enum nft_meta_keys { NFT_META_LEN, @@ -817,6 +821,8 @@ enum nft_meta_keys { NFT_META_CGROUP, NFT_META_PRANDOM, NFT_META_SECPATH, + NFT_META_IIFKIND, + NFT_META_OIFKIND, }; /** @@ -871,8 +877,8 @@ enum nft_hash_attributes { NFTA_HASH_SEED, NFTA_HASH_OFFSET, NFTA_HASH_TYPE, - NFTA_HASH_SET_NAME, - NFTA_HASH_SET_ID, + NFTA_HASH_SET_NAME, /* deprecated */ + NFTA_HASH_SET_ID, /* deprecated */ __NFTA_HASH_MAX, }; #define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 32a3416b51c3..02ac251be8c4 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -333,12 +333,19 @@ enum { /* Basic filter */ +struct tc_basic_pcnt { + __u64 rcnt; + __u64 rhit; +}; + enum { TCA_BASIC_UNSPEC, TCA_BASIC_CLASSID, TCA_BASIC_EMATCHES, TCA_BASIC_ACT, TCA_BASIC_POLICE, + TCA_BASIC_PCNT, + TCA_BASIC_PAD, __TCA_BASIC_MAX }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d584073532b8..b8f2c4d56532 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -59,6 +59,10 @@ typedef __s32 sctp_assoc_t; +#define SCTP_FUTURE_ASSOC 0 +#define SCTP_CURRENT_ASSOC 1 +#define SCTP_ALL_ASSOC 2 + /* The following symbols come from the Sockets API Extensions for * SCTP . */ diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h new file mode 100644 index 000000000000..78b2591a7782 --- /dev/null +++ b/include/uapi/linux/xdp_diag.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * xdp_diag: interface for query/monitor XDP sockets + * Copyright(c) 2019 Intel Corporation. + */ + +#ifndef _LINUX_XDP_DIAG_H +#define _LINUX_XDP_DIAG_H + +#include + +struct xdp_diag_req { + __u8 sdiag_family; + __u8 sdiag_protocol; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_show; + __u32 xdiag_cookie[2]; +}; + +struct xdp_diag_msg { + __u8 xdiag_family; + __u8 xdiag_type; + __u16 pad; + __u32 xdiag_ino; + __u32 xdiag_cookie[2]; +}; + +#define XDP_SHOW_INFO (1 << 0) /* Basic information */ +#define XDP_SHOW_RING_CFG (1 << 1) +#define XDP_SHOW_UMEM (1 << 2) +#define XDP_SHOW_MEMINFO (1 << 3) + +enum { + XDP_DIAG_NONE, + XDP_DIAG_INFO, + XDP_DIAG_UID, + XDP_DIAG_RX_RING, + XDP_DIAG_TX_RING, + XDP_DIAG_UMEM, + XDP_DIAG_UMEM_FILL_RING, + XDP_DIAG_UMEM_COMPLETION_RING, + XDP_DIAG_MEMINFO, + __XDP_DIAG_MAX, +}; + +#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) + +struct xdp_diag_info { + __u32 ifindex; + __u32 queue_id; +}; + +struct xdp_diag_ring { + __u32 entries; /*num descs */ +}; + +#define XDP_DU_F_ZEROCOPY (1 << 0) + +struct xdp_diag_umem { + __u64 size; + __u32 id; + __u32 num_pages; + __u32 chunk_size; + __u32 headroom; + __u32 ifindex; + __u32 queue_id; + __u32 flags; + __u32 refs; +}; + +#endif /* _LINUX_XDP_DIAG_H */ diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index d13fd490b66d..6e73f0274e41 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -78,6 +78,7 @@ enum pvrdma_wr_opcode { PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD, PVRDMA_WR_BIND_MW, PVRDMA_WR_REG_SIG_MR, + PVRDMA_WR_ERROR, }; enum pvrdma_wc_status { diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 59a260712a56..2ca9164a79bf 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -1,17 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H -#define _ASM_ARM_XEN_PAGE_COHERENT_H - -#include -#include -#include - -static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) -{ - if (dev && dev->archdata.dev_dma_ops) - return dev->archdata.dev_dma_ops; - return get_arch_dma_ops(NULL); -} +#ifndef _XEN_ARM_PAGE_COHERENT_H +#define _XEN_ARM_PAGE_COHERENT_H void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, @@ -21,87 +10,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, unsigned long attrs); void __xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); - void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->unmap_page) - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); - } else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) - xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_device) - xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ +#endif /* _XEN_ARM_PAGE_COHERENT_H */ diff --git a/init/Kconfig b/init/Kconfig index d47cb77a220e..513fa544a134 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1124,6 +1124,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION bool "Dead code and data elimination (EXPERIMENTAL)" depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION depends on EXPERT + depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800) depends on $(cc-option,-ffunction-sections -fdata-sections) depends on $(ld-option,--gc-sections) help diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a2f53642592b..3d661f0606fe 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -157,7 +157,7 @@ * */ -#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) +#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) @@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) return kind_ops[BTF_INFO_KIND(t->info)]; } -bool btf_name_offset_valid(const struct btf *btf, u32 offset) +static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { return BTF_STR_OFFSET_VALID(offset) && offset < btf->hdr.str_len; @@ -525,7 +525,7 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) /* * Regular int is not a bit field and it must be either - * u8/u16/u32/u64. + * u8/u16/u32/u64 or __int128. */ static bool btf_type_int_is_regular(const struct btf_type *t) { @@ -538,7 +538,8 @@ static bool btf_type_int_is_regular(const struct btf_type *t) if (BITS_PER_BYTE_MASKED(nr_bits) || BTF_INT_OFFSET(int_data) || (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { + nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && + nr_bytes != (2 * sizeof(u64)))) { return false; } @@ -1063,9 +1064,9 @@ static int btf_int_check_member(struct btf_verifier_env *env, nr_copy_bits = BTF_INT_BITS(int_data) + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; } @@ -1119,9 +1120,9 @@ static int btf_int_check_kflag_member(struct btf_verifier_env *env, bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; } @@ -1168,9 +1169,9 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env, nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data); - if (nr_bits > BITS_PER_U64) { + if (nr_bits > BITS_PER_U128) { btf_verifier_log_type(env, t, "nr_bits exceeds %zu", - BITS_PER_U64); + BITS_PER_U128); return -EINVAL; } @@ -1211,31 +1212,93 @@ static void btf_int_log(struct btf_verifier_env *env, btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } +static void btf_int128_print(struct seq_file *m, void *data) +{ + /* data points to a __int128 number. + * Suppose + * int128_num = *(__int128 *)data; + * The below formulas shows what upper_num and lower_num represents: + * upper_num = int128_num >> 64; + * lower_num = int128_num & 0xffffffffFFFFFFFFULL; + */ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = *(u64 *)data; + lower_num = *(u64 *)(data + 8); +#else + upper_num = *(u64 *)(data + 8); + lower_num = *(u64 *)data; +#endif + if (upper_num == 0) + seq_printf(m, "0x%llx", lower_num); + else + seq_printf(m, "0x%llx%016llx", upper_num, lower_num); +} + +static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, + u16 right_shift_bits) +{ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = print_num[0]; + lower_num = print_num[1]; +#else + upper_num = print_num[1]; + lower_num = print_num[0]; +#endif + + /* shake out un-needed bits by shift/or operations */ + if (left_shift_bits >= 64) { + upper_num = lower_num << (left_shift_bits - 64); + lower_num = 0; + } else { + upper_num = (upper_num << left_shift_bits) | + (lower_num >> (64 - left_shift_bits)); + lower_num = lower_num << left_shift_bits; + } + + if (right_shift_bits >= 64) { + lower_num = upper_num >> (right_shift_bits - 64); + upper_num = 0; + } else { + lower_num = (lower_num >> right_shift_bits) | + (upper_num << (64 - right_shift_bits)); + upper_num = upper_num >> right_shift_bits; + } + +#ifdef __BIG_ENDIAN_BITFIELD + print_num[0] = upper_num; + print_num[1] = lower_num; +#else + print_num[0] = lower_num; + print_num[1] = upper_num; +#endif +} + static void btf_bitfield_seq_show(void *data, u8 bits_offset, u8 nr_bits, struct seq_file *m) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; u8 nr_copy_bits; - u64 print_num; + u64 print_num[2] = {}; nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); - print_num = 0; - memcpy(&print_num, data, nr_copy_bytes); + memcpy(print_num, data, nr_copy_bytes); #ifdef __BIG_ENDIAN_BITFIELD left_shift_bits = bits_offset; #else - left_shift_bits = BITS_PER_U64 - nr_copy_bits; + left_shift_bits = BITS_PER_U128 - nr_copy_bits; #endif - right_shift_bits = BITS_PER_U64 - nr_bits; + right_shift_bits = BITS_PER_U128 - nr_bits; - print_num <<= left_shift_bits; - print_num >>= right_shift_bits; - - seq_printf(m, "0x%llx", print_num); + btf_int128_shift(print_num, left_shift_bits, right_shift_bits); + btf_int128_print(m, print_num); } @@ -1250,7 +1313,7 @@ static void btf_int_bits_seq_show(const struct btf *btf, /* * bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. + * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); @@ -1274,6 +1337,9 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, } switch (nr_bits) { + case 128: + btf_int128_print(m, data); + break; case 64: if (sign) seq_printf(m, "%lld", *(s64 *)data); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9425c2fb872f..ab612fe9862f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); + /* fall through */ default: return NULL; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f908b9356025..a7bcb23bee84 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -307,15 +307,16 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; + s32 delta = end_new - end_old; s64 imm = insn->imm; - if (curr < pos && curr + imm + 1 > pos) + if (curr < pos && curr + imm + 1 >= end_old) imm += delta; - else if (curr > pos + delta && curr + imm + 1 <= pos + delta) + else if (curr >= end_new && curr + imm + 1 < end_new) imm -= delta; if (imm < imm_min || imm > imm_max) return -ERANGE; @@ -324,15 +325,16 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, - u32 curr, const bool probe_pass) +static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, + s32 end_new, u32 curr, const bool probe_pass) { const s32 off_min = S16_MIN, off_max = S16_MAX; + s32 delta = end_new - end_old; s32 off = insn->off; - if (curr < pos && curr + off + 1 > pos) + if (curr < pos && curr + off + 1 >= end_old) off += delta; - else if (curr > pos + delta && curr + off + 1 <= pos + delta) + else if (curr >= end_new && curr + off + 1 < end_new) off -= delta; if (off < off_min || off > off_max) return -ERANGE; @@ -341,10 +343,10 @@ static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, return 0; } -static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, - const bool probe_pass) +static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, + s32 end_new, const bool probe_pass) { - u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); + u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); struct bpf_insn *insn = prog->insnsi; int ret = 0; @@ -356,22 +358,23 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, * do any other adjustments. Therefore skip the patchlet. */ if (probe_pass && i == pos) { - i += delta + 1; - insn++; + i = end_new; + insn = prog->insnsi + end_old; } code = insn->code; - if (BPF_CLASS(code) != BPF_JMP || + if ((BPF_CLASS(code) != BPF_JMP && + BPF_CLASS(code) != BPF_JMP32) || BPF_OP(code) == BPF_EXIT) continue; /* Adjust offset of jmps if we cross patch boundaries. */ if (BPF_OP(code) == BPF_CALL) { if (insn->src_reg != BPF_PSEUDO_CALL) continue; - ret = bpf_adj_delta_to_imm(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); } else { - ret = bpf_adj_delta_to_off(insn, pos, delta, i, - probe_pass); + ret = bpf_adj_delta_to_off(insn, pos, end_old, + end_new, i, probe_pass); } if (ret) break; @@ -421,7 +424,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * we afterwards may not fail anymore. */ if (insn_adj_cnt > cnt_max && - bpf_adj_branches(prog, off, insn_delta, true)) + bpf_adj_branches(prog, off, off + 1, off + len, true)) return NULL; /* Several new instructions need to be inserted. Make room @@ -453,13 +456,25 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, * the ship has sailed to reverse to the original state. An * overflow cannot happen at this point. */ - BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); + BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); bpf_adj_linfo(prog_adj, off, insn_delta); return prog_adj; } +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) +{ + /* Branch offsets can't overflow when program is shrinking, no need + * to call bpf_adj_branches(..., true) here + */ + memmove(prog->insnsi + off, prog->insnsi + off + cnt, + sizeof(struct bpf_insn) * (prog->len - off - cnt)); + prog->len -= cnt; + + return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); +} + void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; @@ -934,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, + off); + break; + case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); @@ -1130,6 +1166,31 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_2(JMP, CALL), \ /* Exit instruction. */ \ INSN_2(JMP, EXIT), \ + /* 32-bit Jump instructions. */ \ + /* Register based. */ \ + INSN_3(JMP32, JEQ, X), \ + INSN_3(JMP32, JNE, X), \ + INSN_3(JMP32, JGT, X), \ + INSN_3(JMP32, JLT, X), \ + INSN_3(JMP32, JGE, X), \ + INSN_3(JMP32, JLE, X), \ + INSN_3(JMP32, JSGT, X), \ + INSN_3(JMP32, JSLT, X), \ + INSN_3(JMP32, JSGE, X), \ + INSN_3(JMP32, JSLE, X), \ + INSN_3(JMP32, JSET, X), \ + /* Immediate based. */ \ + INSN_3(JMP32, JEQ, K), \ + INSN_3(JMP32, JNE, K), \ + INSN_3(JMP32, JGT, K), \ + INSN_3(JMP32, JLT, K), \ + INSN_3(JMP32, JGE, K), \ + INSN_3(JMP32, JLE, K), \ + INSN_3(JMP32, JSGT, K), \ + INSN_3(JMP32, JSLT, K), \ + INSN_3(JMP32, JSGE, K), \ + INSN_3(JMP32, JSLE, K), \ + INSN_3(JMP32, JSET, K), \ /* Jump instructions. */ \ /* Register based. */ \ INSN_3(JMP, JEQ, X), \ @@ -1390,145 +1451,49 @@ select_insn: out: CONT; } - /* JMP */ JMP_JA: insn += insn->off; CONT; - JMP_JEQ_X: - if (DST == SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JEQ_K: - if (DST == IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_X: - if (DST != SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JNE_K: - if (DST != IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_X: - if (DST > SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGT_K: - if (DST > IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLT_X: - if (DST < SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLT_K: - if (DST < IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_X: - if (DST >= SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JGE_K: - if (DST >= IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLE_X: - if (DST <= SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JLE_K: - if (DST <= IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_X: - if (((s64) DST) > ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGT_K: - if (((s64) DST) > ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLT_X: - if (((s64) DST) < ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLT_K: - if (((s64) DST) < ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_X: - if (((s64) DST) >= ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSGE_K: - if (((s64) DST) >= ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLE_X: - if (((s64) DST) <= ((s64) SRC)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSLE_K: - if (((s64) DST) <= ((s64) IMM)) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_X: - if (DST & SRC) { - insn += insn->off; - CONT_JMP; - } - CONT; - JMP_JSET_K: - if (DST & IMM) { - insn += insn->off; - CONT_JMP; - } - CONT; JMP_EXIT: return BPF_R0; - + /* JMP */ +#define COND_JMP(SIGN, OPCODE, CMP_OP) \ + JMP_##OPCODE##_X: \ + if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP32_##OPCODE##_X: \ + if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP_##OPCODE##_K: \ + if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; \ + JMP32_##OPCODE##_K: \ + if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \ + insn += insn->off; \ + CONT_JMP; \ + } \ + CONT; + COND_JMP(u, JEQ, ==) + COND_JMP(u, JNE, !=) + COND_JMP(u, JGT, >) + COND_JMP(u, JLT, <) + COND_JMP(u, JGE, >=) + COND_JMP(u, JLE, <=) + COND_JMP(u, JSET, &) + COND_JMP(s, JSGT, >) + COND_JMP(s, JSLT, <) + COND_JMP(s, JSGE, >=) + COND_JMP(s, JSLE, <=) +#undef COND_JMP /* STX and ST and LDX*/ #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index d6b76377cb6e..de73f55e42fd 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -67,7 +67,7 @@ const char *const bpf_class_string[8] = { [BPF_STX] = "stx", [BPF_ALU] = "alu", [BPF_JMP] = "jmp", - [BPF_RET] = "BUG", + [BPF_JMP32] = "jmp32", [BPF_ALU64] = "alu64", }; @@ -136,23 +136,22 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, else print_bpf_end_insn(verbose, cbs->private_data, insn); } else if (BPF_OP(insn->code) == BPF_NEG) { - verbose(cbs->private_data, "(%02x) r%d = %s-r%d\n", - insn->code, insn->dst_reg, - class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d = -%c%d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', + insn->dst_reg, class == BPF_ALU ? 'w' : 'r', insn->dst_reg); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(cbs->private_data, "(%02x) %sr%d %s %sr%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d %s %c%d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", + class == BPF_ALU ? 'w' : 'r', insn->src_reg); } else { - verbose(cbs->private_data, "(%02x) %sr%d %s %s%d\n", - insn->code, class == BPF_ALU ? "(u32) " : "", + verbose(cbs->private_data, "(%02x) %c%d %s %d\n", + insn->code, class == BPF_ALU ? 'w' : 'r', insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], - class == BPF_ALU ? "(u32) " : "", insn->imm); } } else if (class == BPF_STX) { @@ -220,7 +219,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_ld_%02x\n", insn->code); return; } - } else if (class == BPF_JMP) { + } else if (class == BPF_JMP32 || class == BPF_JMP) { u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { @@ -244,13 +243,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, } else if (insn->code == (BPF_JMP | BPF_EXIT)) { verbose(cbs->private_data, "(%02x) exit\n", insn->code); } else if (BPF_SRC(insn->code) == BPF_X) { - verbose(cbs->private_data, "(%02x) if r%d %s r%d goto pc%+d\n", - insn->code, insn->dst_reg, + verbose(cbs->private_data, + "(%02x) if %c%d %s %c%d goto pc%+d\n", + insn->code, class == BPF_JMP32 ? 'w' : 'r', + insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], + class == BPF_JMP32 ? 'w' : 'r', insn->src_reg, insn->off); } else { - verbose(cbs->private_data, "(%02x) if r%d %s 0x%x goto pc%+d\n", - insn->code, insn->dst_reg, + verbose(cbs->private_data, + "(%02x) if %c%d %s 0x%x goto pc%+d\n", + insn->code, class == BPF_JMP32 ? 'w' : 'r', + insn->dst_reg, bpf_jmp_string[BPF_OP(insn->code) >> 4], insn->imm, insn->off); } diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 99d243e1ad6e..52378d3e34b3 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) { struct bpf_map *inner_map, *inner_map_meta; + u32 inner_map_meta_size; struct fd f; f = fdget(inner_map_ufd); @@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } - inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + inner_map_meta_size = sizeof(*inner_map_meta); + /* In some cases verifier needs to access beyond just base map. */ + if (inner_map->ops == &array_map_ops) + inner_map_meta_size = sizeof(struct bpf_array); + + inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { fdput(f); return ERR_PTR(-ENOMEM); @@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->key_size = inner_map->key_size; inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; - inner_map_meta->ops = inner_map->ops; inner_map_meta->max_entries = inner_map->max_entries; + /* Misc members not needed in bpf_map_meta_equal() check. */ + inner_map_meta->ops = inner_map->ops; + if (inner_map->ops == &array_map_ops) { + inner_map_meta->unpriv_array = inner_map->unpriv_array; + container_of(inner_map_meta, struct bpf_array, map)->index_mask = + container_of(inner_map, struct bpf_array, map)->index_mask; + } + fdput(f); return inner_map_meta; } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 54cf2b9c44a4..39dba8c90331 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env) return ret; } +void +bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off, + struct bpf_insn *insn) +{ + const struct bpf_prog_offload_ops *ops; + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + ops = offload->offdev->ops; + if (!offload->opt_failed && ops->replace_insn) + ret = ops->replace_insn(env, off, insn); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + +void +bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_prog_offload *offload; + int ret = -EOPNOTSUPP; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (!offload->opt_failed && offload->offdev->ops->remove_insns) + ret = offload->offdev->ops->remove_insns(env, off, cnt); + offload->opt_failed |= ret; + } + up_read(&bpf_devs_lock); +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d9e2483669d0..d43b14535827 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr, if (nhdr->n_type == BPF_BUILD_ID && nhdr->n_namesz == sizeof("GNU") && - nhdr->n_descsz == BPF_BUILD_ID_SIZE) { + nhdr->n_descsz > 0 && + nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { memcpy(build_id, note_start + note_offs + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), - BPF_BUILD_ID_SIZE); + nhdr->n_descsz); + memset(build_id + nhdr->n_descsz, 0, + BPF_BUILD_ID_SIZE - nhdr->n_descsz); return 0; } new_offs = note_offs + sizeof(Elf32_Nhdr) + @@ -311,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; + memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); } return; } @@ -321,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; + memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); continue; } id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 56674a7c3778..8c1c21cd50b4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1095,7 +1095,7 @@ static int check_subprogs(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++) { u8 code = insn[i].code; - if (BPF_CLASS(code) != BPF_JMP) + if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) goto next; @@ -4031,11 +4031,50 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * 0 - branch will not be taken and fall-through to next insn * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] */ -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) +static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, + bool is_jmp32) { + struct bpf_reg_state reg_lo; + s64 sval; + if (__is_pointer_value(false, reg)) return -1; + if (is_jmp32) { + reg_lo = *reg; + reg = ®_lo; + /* For JMP32, only low 32 bits are compared, coerce_reg_to_size + * could truncate high bits and update umin/umax according to + * information of low bits. + */ + coerce_reg_to_size(reg, 4); + /* smin/smax need special handling. For example, after coerce, + * if smin_value is 0x00000000ffffffffLL, the value is -1 when + * used as operand to JMP32. It is a negative number from s32's + * point of view, while it is a positive number when seen as + * s64. The smin/smax are kept as s64, therefore, when used with + * JMP32, they need to be transformed into s32, then sign + * extended back to s64. + * + * Also, smin/smax were copied from umin/umax. If umin/umax has + * different sign bit, then min/max relationship doesn't + * maintain after casting into s32, for this case, set smin/smax + * to safest range. + */ + if ((reg->umax_value ^ reg->umin_value) & + (1ULL << 31)) { + reg->smin_value = S32_MIN; + reg->smax_value = S32_MAX; + } + reg->smin_value = (s64)(s32)reg->smin_value; + reg->smax_value = (s64)(s32)reg->smax_value; + + val = (u32)val; + sval = (s64)(s32)val; + } else { + sval = (s64)val; + } + switch (opcode) { case BPF_JEQ: if (tnum_is_const(reg->var_off)) @@ -4058,9 +4097,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSGT: - if (reg->smin_value > (s64)val) + if (reg->smin_value > sval) return 1; - else if (reg->smax_value < (s64)val) + else if (reg->smax_value < sval) return 0; break; case BPF_JLT: @@ -4070,9 +4109,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSLT: - if (reg->smax_value < (s64)val) + if (reg->smax_value < sval) return 1; - else if (reg->smin_value >= (s64)val) + else if (reg->smin_value >= sval) return 0; break; case BPF_JGE: @@ -4082,9 +4121,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSGE: - if (reg->smin_value >= (s64)val) + if (reg->smin_value >= sval) return 1; - else if (reg->smax_value < (s64)val) + else if (reg->smax_value < sval) return 0; break; case BPF_JLE: @@ -4094,9 +4133,9 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return 0; break; case BPF_JSLE: - if (reg->smax_value <= (s64)val) + if (reg->smax_value <= sval) return 1; - else if (reg->smin_value > (s64)val) + else if (reg->smin_value > sval) return 0; break; } @@ -4104,6 +4143,29 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return -1; } +/* Generate min value of the high 32-bit from TNUM info. */ +static u64 gen_hi_min(struct tnum var) +{ + return var.value & ~0xffffffffULL; +} + +/* Generate max value of the high 32-bit from TNUM info. */ +static u64 gen_hi_max(struct tnum var) +{ + return (var.value | var.mask) & ~0xffffffffULL; +} + +/* Return true if VAL is compared with a s64 sign extended from s32, and they + * are with the same signedness. + */ +static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg) +{ + return ((s32)sval >= 0 && + reg->smin_value >= 0 && reg->smax_value <= S32_MAX) || + ((s32)sval < 0 && + reg->smax_value <= 0 && reg->smin_value >= S32_MIN); +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -4111,8 +4173,10 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) */ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, - u8 opcode) + u8 opcode, bool is_jmp32) { + s64 sval; + /* If the dst_reg is a pointer, we can't learn anything about its * variable offset from the compare (unless src_reg were a pointer into * the same object, but we don't bother with that. @@ -4122,19 +4186,31 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, if (__is_pointer_value(false, false_reg)) return; + val = is_jmp32 ? (u32)val : val; + sval = is_jmp32 ? (s64)(s32)val : (s64)val; + switch (opcode) { case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - __mark_reg_known(true_reg, val); - break; case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; + { + struct bpf_reg_state *reg = + opcode == BPF_JEQ ? true_reg : false_reg; + + /* For BPF_JEQ, if this is false we know nothing Jon Snow, but + * if it is true we know the value for sure. Likewise for + * BPF_JNE. */ - __mark_reg_known(false_reg, val); + if (is_jmp32) { + u64 old_v = reg->var_off.value; + u64 hi_mask = ~0xffffffffULL; + + reg->var_off.value = (old_v & hi_mask) | val; + reg->var_off.mask &= hi_mask; + } else { + __mark_reg_known(reg, val); + } break; + } case BPF_JSET: false_reg->var_off = tnum_and(false_reg->var_off, tnum_const(~val)); @@ -4142,38 +4218,61 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, true_reg->var_off = tnum_or(true_reg->var_off, tnum_const(val)); break; - case BPF_JGT: - false_reg->umax_value = min(false_reg->umax_value, val); - true_reg->umin_value = max(true_reg->umin_value, val + 1); - break; - case BPF_JSGT: - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); - break; - case BPF_JLT: - false_reg->umin_value = max(false_reg->umin_value, val); - true_reg->umax_value = min(true_reg->umax_value, val - 1); - break; - case BPF_JSLT: - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); - break; case BPF_JGE: - false_reg->umax_value = min(false_reg->umax_value, val - 1); - true_reg->umin_value = max(true_reg->umin_value, val); + case BPF_JGT: + { + u64 false_umax = opcode == BPF_JGT ? val : val - 1; + u64 true_umin = opcode == BPF_JGT ? val + 1 : val; + + if (is_jmp32) { + false_umax += gen_hi_max(false_reg->var_off); + true_umin += gen_hi_min(true_reg->var_off); + } + false_reg->umax_value = min(false_reg->umax_value, false_umax); + true_reg->umin_value = max(true_reg->umin_value, true_umin); break; + } case BPF_JSGE: - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); + case BPF_JSGT: + { + s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; + s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; + + /* If the full s64 was not sign-extended from s32 then don't + * deduct further info. + */ + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; + false_reg->smax_value = min(false_reg->smax_value, false_smax); + true_reg->smin_value = max(true_reg->smin_value, true_smin); break; + } case BPF_JLE: - false_reg->umin_value = max(false_reg->umin_value, val + 1); - true_reg->umax_value = min(true_reg->umax_value, val); + case BPF_JLT: + { + u64 false_umin = opcode == BPF_JLT ? val : val + 1; + u64 true_umax = opcode == BPF_JLT ? val - 1 : val; + + if (is_jmp32) { + false_umin += gen_hi_min(false_reg->var_off); + true_umax += gen_hi_max(true_reg->var_off); + } + false_reg->umin_value = max(false_reg->umin_value, false_umin); + true_reg->umax_value = min(true_reg->umax_value, true_umax); break; + } case BPF_JSLE: - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); + case BPF_JSLT: + { + s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; + s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; + + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; + false_reg->smin_value = max(false_reg->smin_value, false_smin); + true_reg->smax_value = min(true_reg->smax_value, true_smax); break; + } default: break; } @@ -4196,24 +4295,34 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, - u8 opcode) + u8 opcode, bool is_jmp32) { + s64 sval; + if (__is_pointer_value(false, false_reg)) return; + val = is_jmp32 ? (u32)val : val; + sval = is_jmp32 ? (s64)(s32)val : (s64)val; + switch (opcode) { case BPF_JEQ: - /* If this is false then we know nothing Jon Snow, but if it is - * true then we know for sure. - */ - __mark_reg_known(true_reg, val); - break; case BPF_JNE: - /* If this is true we know nothing Jon Snow, but if it is false - * we know the value for sure; - */ - __mark_reg_known(false_reg, val); + { + struct bpf_reg_state *reg = + opcode == BPF_JEQ ? true_reg : false_reg; + + if (is_jmp32) { + u64 old_v = reg->var_off.value; + u64 hi_mask = ~0xffffffffULL; + + reg->var_off.value = (old_v & hi_mask) | val; + reg->var_off.mask &= hi_mask; + } else { + __mark_reg_known(reg, val); + } break; + } case BPF_JSET: false_reg->var_off = tnum_and(false_reg->var_off, tnum_const(~val)); @@ -4221,38 +4330,58 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, true_reg->var_off = tnum_or(true_reg->var_off, tnum_const(val)); break; - case BPF_JGT: - true_reg->umax_value = min(true_reg->umax_value, val - 1); - false_reg->umin_value = max(false_reg->umin_value, val); - break; - case BPF_JSGT: - true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1); - false_reg->smin_value = max_t(s64, false_reg->smin_value, val); - break; - case BPF_JLT: - true_reg->umin_value = max(true_reg->umin_value, val + 1); - false_reg->umax_value = min(false_reg->umax_value, val); - break; - case BPF_JSLT: - true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1); - false_reg->smax_value = min_t(s64, false_reg->smax_value, val); - break; case BPF_JGE: - true_reg->umax_value = min(true_reg->umax_value, val); - false_reg->umin_value = max(false_reg->umin_value, val + 1); + case BPF_JGT: + { + u64 false_umin = opcode == BPF_JGT ? val : val + 1; + u64 true_umax = opcode == BPF_JGT ? val - 1 : val; + + if (is_jmp32) { + false_umin += gen_hi_min(false_reg->var_off); + true_umax += gen_hi_max(true_reg->var_off); + } + false_reg->umin_value = max(false_reg->umin_value, false_umin); + true_reg->umax_value = min(true_reg->umax_value, true_umax); break; + } case BPF_JSGE: - true_reg->smax_value = min_t(s64, true_reg->smax_value, val); - false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1); + case BPF_JSGT: + { + s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1; + s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval; + + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; + false_reg->smin_value = max(false_reg->smin_value, false_smin); + true_reg->smax_value = min(true_reg->smax_value, true_smax); break; + } case BPF_JLE: - true_reg->umin_value = max(true_reg->umin_value, val); - false_reg->umax_value = min(false_reg->umax_value, val - 1); + case BPF_JLT: + { + u64 false_umax = opcode == BPF_JLT ? val : val - 1; + u64 true_umin = opcode == BPF_JLT ? val + 1 : val; + + if (is_jmp32) { + false_umax += gen_hi_max(false_reg->var_off); + true_umin += gen_hi_min(true_reg->var_off); + } + false_reg->umax_value = min(false_reg->umax_value, false_umax); + true_reg->umin_value = max(true_reg->umin_value, true_umin); break; + } case BPF_JSLE: - true_reg->smin_value = max_t(s64, true_reg->smin_value, val); - false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1); + case BPF_JSLT: + { + s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1; + s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval; + + if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg)) + break; + false_reg->smax_value = min(false_reg->smax_value, false_smax); + true_reg->smin_value = max(true_reg->smin_value, true_smin); break; + } default: break; } @@ -4390,6 +4519,10 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, if (BPF_SRC(insn->code) != BPF_X) return false; + /* Pointers are always 64-bit. */ + if (BPF_CLASS(insn->code) == BPF_JMP32) + return false; + switch (BPF_OP(insn->code)) { case BPF_JGT: if ((dst_reg->type == PTR_TO_PACKET && @@ -4482,16 +4615,18 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs; u8 opcode = BPF_OP(insn->code); + bool is_jmp32; int err; - if (opcode > BPF_JSLE) { - verbose(env, "invalid BPF_JMP opcode %x\n", opcode); + /* Only conditional jumps are expected to reach here. */ + if (opcode == BPF_JA || opcode > BPF_JSLE) { + verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); return -EINVAL; } if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0) { - verbose(env, "BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } @@ -4507,7 +4642,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } else { if (insn->src_reg != BPF_REG_0) { - verbose(env, "BPF_JMP uses reserved fields\n"); + verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } } @@ -4518,9 +4653,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return err; dst_reg = ®s[insn->dst_reg]; + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; if (BPF_SRC(insn->code) == BPF_K) { - int pred = is_branch_taken(dst_reg, insn->imm, opcode); + int pred = is_branch_taken(dst_reg, insn->imm, opcode, + is_jmp32); if (pred == 1) { /* only follow the goto, ignore fall-through */ @@ -4548,30 +4685,51 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * comparable. */ if (BPF_SRC(insn->code) == BPF_X) { + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state lo_reg0 = *dst_reg; + struct bpf_reg_state lo_reg1 = *src_reg; + struct bpf_reg_state *src_lo, *dst_lo; + + dst_lo = &lo_reg0; + src_lo = &lo_reg1; + coerce_reg_to_size(dst_lo, 4); + coerce_reg_to_size(src_lo, 4); + if (dst_reg->type == SCALAR_VALUE && - regs[insn->src_reg].type == SCALAR_VALUE) { - if (tnum_is_const(regs[insn->src_reg].var_off)) + src_reg->type == SCALAR_VALUE) { + if (tnum_is_const(src_reg->var_off) || + (is_jmp32 && tnum_is_const(src_lo->var_off))) reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, regs[insn->src_reg].var_off.value, - opcode); - else if (tnum_is_const(dst_reg->var_off)) + dst_reg, + is_jmp32 + ? src_lo->var_off.value + : src_reg->var_off.value, + opcode, is_jmp32); + else if (tnum_is_const(dst_reg->var_off) || + (is_jmp32 && tnum_is_const(dst_lo->var_off))) reg_set_min_max_inv(&other_branch_regs[insn->src_reg], - ®s[insn->src_reg], - dst_reg->var_off.value, opcode); - else if (opcode == BPF_JEQ || opcode == BPF_JNE) + src_reg, + is_jmp32 + ? dst_lo->var_off.value + : dst_reg->var_off.value, + opcode, is_jmp32); + else if (!is_jmp32 && + (opcode == BPF_JEQ || opcode == BPF_JNE)) /* Comparing for equality, we can combine knowledge */ reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], - ®s[insn->src_reg], - ®s[insn->dst_reg], opcode); + src_reg, dst_reg, opcode); } } else if (dst_reg->type == SCALAR_VALUE) { reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, insn->imm, opcode); + dst_reg, insn->imm, opcode, is_jmp32); } - /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ - if (BPF_SRC(insn->code) == BPF_K && + /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). + * NOTE: these optimizations below are related with pointer comparison + * which will never be JMP32. + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && reg_type_may_be_null(dst_reg->type)) { /* Mark all identical registers in each branch as either @@ -4900,7 +5058,8 @@ peek_stack: goto check_state; t = insn_stack[cur_stack - 1]; - if (BPF_CLASS(insns[t].code) == BPF_JMP) { + if (BPF_CLASS(insns[t].code) == BPF_JMP || + BPF_CLASS(insns[t].code) == BPF_JMP32) { u8 opcode = BPF_OP(insns[t].code); if (opcode == BPF_EXIT) { @@ -4997,13 +5156,14 @@ static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr) { - u32 i, nfuncs, urec_size, min_size, prev_offset; + u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); struct bpf_func_info *krecord; const struct btf_type *type; struct bpf_prog *prog; const struct btf *btf; void __user *urecord; + u32 prev_offset = 0; int ret = 0; nfuncs = attr->func_info_cnt; @@ -6055,7 +6215,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; - } else if (class == BPF_JMP) { + } else if (class == BPF_JMP || class == BPF_JMP32) { u8 opcode = BPF_OP(insn->code); if (opcode == BPF_CALL) { @@ -6063,7 +6223,8 @@ static int do_check(struct bpf_verifier_env *env) insn->off != 0 || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_CALL uses reserved fields\n"); return -EINVAL; } @@ -6079,7 +6240,8 @@ static int do_check(struct bpf_verifier_env *env) if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_JA uses reserved fields\n"); return -EINVAL; } @@ -6091,7 +6253,8 @@ static int do_check(struct bpf_verifier_env *env) if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 || - insn->dst_reg != BPF_REG_0) { + insn->dst_reg != BPF_REG_0 || + class == BPF_JMP32) { verbose(env, "BPF_EXIT uses reserved fields\n"); return -EINVAL; } @@ -6431,6 +6594,153 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, + u32 off, u32 cnt) +{ + int i, j; + + /* find first prog starting at or after off (first to remove) */ + for (i = 0; i < env->subprog_cnt; i++) + if (env->subprog_info[i].start >= off) + break; + /* find first prog starting at or after off + cnt (first to stay) */ + for (j = i; j < env->subprog_cnt; j++) + if (env->subprog_info[j].start >= off + cnt) + break; + /* if j doesn't start exactly at off + cnt, we are just removing + * the front of previous prog + */ + if (env->subprog_info[j].start != off + cnt) + j--; + + if (j > i) { + struct bpf_prog_aux *aux = env->prog->aux; + int move; + + /* move fake 'exit' subprog as well */ + move = env->subprog_cnt + 1 - j; + + memmove(env->subprog_info + i, + env->subprog_info + j, + sizeof(*env->subprog_info) * move); + env->subprog_cnt -= j - i; + + /* remove func_info */ + if (aux->func_info) { + move = aux->func_info_cnt - j; + + memmove(aux->func_info + i, + aux->func_info + j, + sizeof(*aux->func_info) * move); + aux->func_info_cnt -= j - i; + /* func_info->insn_off is set after all code rewrites, + * in adjust_btf_func() - no need to adjust + */ + } + } else { + /* convert i from "first prog to remove" to "first to adjust" */ + if (env->subprog_info[i].start == off) + i++; + } + + /* update fake 'exit' subprog as well */ + for (; i <= env->subprog_cnt; i++) + env->subprog_info[i].start -= cnt; + + return 0; +} + +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, + u32 cnt) +{ + struct bpf_prog *prog = env->prog; + u32 i, l_off, l_cnt, nr_linfo; + struct bpf_line_info *linfo; + + nr_linfo = prog->aux->nr_linfo; + if (!nr_linfo) + return 0; + + linfo = prog->aux->linfo; + + /* find first line info to remove, count lines to be removed */ + for (i = 0; i < nr_linfo; i++) + if (linfo[i].insn_off >= off) + break; + + l_off = i; + l_cnt = 0; + for (; i < nr_linfo; i++) + if (linfo[i].insn_off < off + cnt) + l_cnt++; + else + break; + + /* First live insn doesn't match first live linfo, it needs to "inherit" + * last removed linfo. prog is already modified, so prog->len == off + * means no live instructions after (tail of the program was removed). + */ + if (prog->len != off && l_cnt && + (i == nr_linfo || linfo[i].insn_off != off + cnt)) { + l_cnt--; + linfo[--i].insn_off = off + cnt; + } + + /* remove the line info which refer to the removed instructions */ + if (l_cnt) { + memmove(linfo + l_off, linfo + i, + sizeof(*linfo) * (nr_linfo - i)); + + prog->aux->nr_linfo -= l_cnt; + nr_linfo = prog->aux->nr_linfo; + } + + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ + for (i = l_off; i < nr_linfo; i++) + linfo[i].insn_off -= cnt; + + /* fix up all subprogs (incl. 'exit') which start >= off */ + for (i = 0; i <= env->subprog_cnt; i++) + if (env->subprog_info[i].linfo_idx > l_off) { + /* program may have started in the removed region but + * may not be fully removed + */ + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) + env->subprog_info[i].linfo_idx -= l_cnt; + else + env->subprog_info[i].linfo_idx = l_off; + } + + return 0; +} + +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + unsigned int orig_prog_len = env->prog->len; + int err; + + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_remove_insns(env, off, cnt); + + err = bpf_remove_insns(env->prog, off, cnt); + if (err) + return err; + + err = adjust_subprog_starts_after_remove(env, off, cnt); + if (err) + return err; + + err = bpf_adj_linfo_after_remove(env, off, cnt); + if (err) + return err; + + memmove(aux_data + off, aux_data + off + cnt, + sizeof(*aux_data) * (orig_prog_len - off - cnt)); + + return 0; +} + /* The verifier does more data flow analysis than llvm and will not * explore branches that are dead at run time. Malicious programs can * have dead code too. Therefore replace all dead at-run-time code @@ -6457,6 +6767,91 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) } } +static bool insn_is_cond_jump(u8 code) +{ + u8 op; + + if (BPF_CLASS(code) == BPF_JMP32) + return true; + + if (BPF_CLASS(code) != BPF_JMP) + return false; + + op = BPF_OP(code); + return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL; +} + +static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + const int insn_cnt = env->prog->len; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (!insn_is_cond_jump(insn->code)) + continue; + + if (!aux_data[i + 1].seen) + ja.off = insn->off; + else if (!aux_data[i + 1 + insn->off].seen) + ja.off = 0; + else + continue; + + if (bpf_prog_is_dev_bound(env->prog->aux)) + bpf_prog_offload_replace_insn(env, i, &ja); + + memcpy(insn, &ja, sizeof(ja)); + } +} + +static int opt_remove_dead_code(struct bpf_verifier_env *env) +{ + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + int j; + + j = 0; + while (i + j < insn_cnt && !aux_data[i + j].seen) + j++; + if (!j) + continue; + + err = verifier_remove_insns(env, i, j); + if (err) + return err; + insn_cnt = env->prog->len; + } + + return 0; +} + +static int opt_remove_nops(struct bpf_verifier_env *env) +{ + const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int i, err; + + for (i = 0; i < insn_cnt; i++) { + if (memcmp(&insn[i], &ja, sizeof(ja))) + continue; + + err = verifier_remove_insns(env, i, 1); + if (err) + return err; + insn_cnt--; + i--; + } + + return 0; +} + /* convert load instructions that access fields of a context type into a * sequence of instructions that access fields of the underlying structure: * struct __sk_buff -> struct sk_buff @@ -7147,7 +7542,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, { struct bpf_verifier_env *env; struct bpf_verifier_log *log; - int ret = -EINVAL; + int i, len, ret = -EINVAL; + bool is_priv; /* no program is valid */ if (ARRAY_SIZE(bpf_verifier_ops) == 0) @@ -7161,12 +7557,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, return -ENOMEM; log = &env->log; + len = (*prog)->len; env->insn_aux_data = - vzalloc(array_size(sizeof(struct bpf_insn_aux_data), - (*prog)->len)); + vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); ret = -ENOMEM; if (!env->insn_aux_data) goto err_free_env; + for (i = 0; i < len; i++) + env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; @@ -7194,6 +7592,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; + is_priv = capable(CAP_SYS_ADMIN); + env->allow_ptr_leaks = is_priv; + ret = replace_map_fd_with_map_ptr(env); if (ret < 0) goto skip_full_check; @@ -7211,8 +7612,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (!env->explored_states) goto skip_full_check; - env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@ -7242,8 +7641,17 @@ skip_full_check: ret = check_max_stack_depth(env); /* instruction rewrites happen after this point */ - if (ret == 0) - sanitize_dead_code(env); + if (is_priv) { + if (ret == 0) + opt_hard_wire_dead_code_branches(env); + if (ret == 0) + ret = opt_remove_dead_code(env); + if (ret == 0) + ret = opt_remove_nops(env); + } else { + if (ret == 0) + sanitize_dead_code(env); + } if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d6361776dc5c..1fb6fd68b9c7 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -378,6 +378,8 @@ void __init swiotlb_exit(void) memblock_free_late(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } + io_tlb_start = 0; + io_tlb_end = 0; io_tlb_nslabs = 0; max_segment = 0; } diff --git a/kernel/exit.c b/kernel/exit.c index 284f2fe9a293..3fb7be001964 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w) * MB (A) MB (B) * [L] cond [L] tsk */ - smp_rmb(); /* (B) */ + smp_mb(); /* (B) */ /* * Avoid using task_rcu_dereference() magic as long as we are careful, diff --git a/kernel/futex.c b/kernel/futex.c index be3bff2315ff..fdd312da0992 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) return; - /* - * Queue the task for later wakeup for after we've released - * the hb->lock. wake_q_add() grabs reference to p. - */ - wake_q_add(wake_q, p); + get_task_struct(p); __unqueue_futex(q); /* * The waiting task can free the futex_q as soon as q->lock_ptr = NULL @@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) * plist_del in __unqueue_futex(). */ smp_store_release(&q->lock_ptr, NULL); + + /* + * Queue the task for later wakeup for after we've released + * the hb->lock. wake_q_add() grabs reference to p. + */ + wake_q_add(wake_q, p); + put_task_struct(p); } /* diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ee062b7939d3..ef8ad36cadcf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -457,7 +457,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, /* Validate affinity mask(s) */ if (affinity) { - for (i = 0; i < cnt; i++, i++) { + for (i = 0; i < cnt; i++) { if (cpumask_empty(&affinity[i].mask)) return -EINVAL; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..84b54a17b95d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc) } cpumask_and(&mask, cpu_online_mask, set); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpu_online_mask); + if (node != NUMA_NO_NODE) { const struct cpumask *nodemask = cpumask_of_node(node); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 09b180063ee1..50d9af615dc4 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, woken++; tsk = waiter->task; - wake_q_add(wake_q, tsk); + get_task_struct(tsk); list_del(&waiter->list); /* - * Ensure that the last operation is setting the reader + * Ensure calling get_task_struct() before setting the reader * waiter to nil such that rwsem_down_read_failed() cannot * race with do_exit() by always holding a reference count * to the task to wakeup. */ smp_store_release(&waiter->task, NULL); + /* + * Ensure issuing the wakeup (either by us or someone else) + * after setting the reader waiter to nil. + */ + wake_q_add(wake_q, tsk); + /* wake_q_add() already take the task ref */ + put_task_struct(tsk); } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..d8d76a65cfdd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p) #endif #endif +/** + * wake_q_add() - queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + */ void wake_q_add(struct wake_q_head *head, struct task_struct *task) { struct wake_q_node *node = &task->wake_q; @@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) * its already queued (either by us or someone else) and will get the * wakeup due to that. * - * This cmpxchg() executes a full barrier, which pairs with the full - * barrier executed by the wakeup in wake_up_q(). + * In order to ensure that a pending wakeup will observe our pending + * state, even in the failed case, an explicit smp_mb() must be used. */ - if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) return; get_task_struct(task); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d7f538847b84..e815781ed751 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) struct seccomp_filter *filter = file->private_data; struct seccomp_knotif *knotif; + if (!filter) + return 0; + mutex_lock(&filter->notify_lock); /* @@ -1300,6 +1303,7 @@ out: out_put_fd: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { if (ret < 0) { + listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); } else { diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8f0644af40be..80f955210861 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * set up the signal and overrun bookkeeping. */ timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); + timer->it_interval = ns_to_ktime(timer->it.cpu.incr); /* * This acts as a modification timestamp for the timer, diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index 14436f4ca6bd..30e0f9770f88 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -52,7 +52,7 @@ u32 int_sqrt64(u64 x) if (x <= ULONG_MAX) return int_sqrt((unsigned long) x); - m = 1ULL << (fls64(x) & ~1ULL); + m = 1ULL << ((fls64(x) - 1) & ~1ULL); while (m != 0) { b = y + m; y >>= 1; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 4676c0a1eeca..c596a957f764 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -199,7 +199,7 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL)); xa_set_mark(xa, index + 1, XA_MARK_0); XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL)); - xa_set_mark(xa, index + 2, XA_MARK_1); + xa_set_mark(xa, index + 2, XA_MARK_2); XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL)); xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL); @@ -209,8 +209,8 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) void *entry; XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); - XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1)); - XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2)); + XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_1)); + XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_2)); /* We should see two elements in the array */ rcu_read_lock(); @@ -357,7 +357,7 @@ static noinline void check_cmpxchg(struct xarray *xa) static noinline void check_reserve(struct xarray *xa) { void *entry; - unsigned long index = 0; + unsigned long index; /* An array with a reserved entry is not empty */ XA_BUG_ON(xa, !xa_empty(xa)); @@ -382,10 +382,12 @@ static noinline void check_reserve(struct xarray *xa) xa_erase_index(xa, 12345678); XA_BUG_ON(xa, !xa_empty(xa)); - /* And so does xa_insert */ + /* But xa_insert does not */ xa_reserve(xa, 12345678, GFP_KERNEL); - XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != 0); - xa_erase_index(xa, 12345678); + XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != + -EEXIST); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL); XA_BUG_ON(xa, !xa_empty(xa)); /* Can iterate through a reserved entry */ @@ -393,7 +395,7 @@ static noinline void check_reserve(struct xarray *xa) xa_reserve(xa, 6, GFP_KERNEL); xa_store_index(xa, 7, GFP_KERNEL); - xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) { + xa_for_each(xa, index, entry) { XA_BUG_ON(xa, index != 5 && index != 7); } xa_destroy(xa); @@ -812,17 +814,16 @@ static noinline void check_find_1(struct xarray *xa) static noinline void check_find_2(struct xarray *xa) { void *entry; - unsigned long i, j, index = 0; + unsigned long i, j, index; - xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) { + xa_for_each(xa, index, entry) { XA_BUG_ON(xa, true); } for (i = 0; i < 1024; i++) { xa_store_index(xa, index, GFP_KERNEL); j = 0; - index = 0; - xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) { + xa_for_each(xa, index, entry) { XA_BUG_ON(xa, xa_mk_index(index) != entry); XA_BUG_ON(xa, index != j++); } @@ -839,6 +840,7 @@ static noinline void check_find_3(struct xarray *xa) for (i = 0; i < 100; i++) { for (j = 0; j < 100; j++) { + rcu_read_lock(); for (k = 0; k < 100; k++) { xas_set(&xas, j); xas_for_each_marked(&xas, entry, k, XA_MARK_0) @@ -847,6 +849,7 @@ static noinline void check_find_3(struct xarray *xa) XA_BUG_ON(xa, xas.xa_node != XAS_RESTART); } + rcu_read_unlock(); } xa_store_index(xa, i, GFP_KERNEL); xa_set_mark(xa, i, XA_MARK_0); @@ -1183,6 +1186,35 @@ static noinline void check_store_range(struct xarray *xa) } } +static void check_align_1(struct xarray *xa, char *name) +{ + int i; + unsigned int id; + unsigned long index; + void *entry; + + for (i = 0; i < 8; i++) { + id = 0; + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL) + != 0); + XA_BUG_ON(xa, id != i); + } + xa_for_each(xa, index, entry) + XA_BUG_ON(xa, xa_is_err(entry)); + xa_destroy(xa); +} + +static noinline void check_align(struct xarray *xa) +{ + char name[] = "Motorola 68000"; + + check_align_1(xa, name); + check_align_1(xa, name + 1); + check_align_1(xa, name + 2); + check_align_1(xa, name + 3); +// check_align_2(xa, name); +} + static LIST_HEAD(shadow_nodes); static void test_update_node(struct xa_node *node) @@ -1332,6 +1364,7 @@ static int xarray_checks(void) check_create_range(&array); check_store_range(&array); check_store_iter(&array); + check_align(&xa0); check_workingset(&array, 0); check_workingset(&array, 64); diff --git a/lib/xarray.c b/lib/xarray.c index 5f3f9311de89..81c3171ddde9 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -232,6 +232,8 @@ void *xas_load(struct xa_state *xas) if (xas->xa_shift > node->shift) break; entry = xas_descend(xas, node); + if (node->shift == 0) + break; } return entry; } @@ -506,7 +508,7 @@ static void xas_free_nodes(struct xa_state *xas, struct xa_node *top) for (;;) { void *entry = xa_entry_locked(xas->xa, node, offset); - if (xa_is_node(entry)) { + if (node->shift && xa_is_node(entry)) { node = xa_to_node(entry); offset = 0; continue; @@ -604,6 +606,7 @@ static int xas_expand(struct xa_state *xas, void *head) /* * xas_create() - Create a slot to store an entry in. * @xas: XArray operation state. + * @allow_root: %true if we can store the entry in the root directly * * Most users will not need to call this function directly, as it is called * by xas_store(). It is useful for doing conditional store operations @@ -613,7 +616,7 @@ static int xas_expand(struct xa_state *xas, void *head) * If the slot was newly created, returns %NULL. If it failed to create the * slot, returns %NULL and indicates the error in @xas. */ -static void *xas_create(struct xa_state *xas) +static void *xas_create(struct xa_state *xas, bool allow_root) { struct xarray *xa = xas->xa; void *entry; @@ -628,6 +631,8 @@ static void *xas_create(struct xa_state *xas) shift = xas_expand(xas, entry); if (shift < 0) return NULL; + if (!shift && !allow_root) + shift = XA_CHUNK_SHIFT; entry = xa_head_locked(xa); slot = &xa->xa_head; } else if (xas_error(xas)) { @@ -687,7 +692,7 @@ void xas_create_range(struct xa_state *xas) xas->xa_sibs = 0; for (;;) { - xas_create(xas); + xas_create(xas, true); if (xas_error(xas)) goto restore; if (xas->xa_index <= (index | XA_CHUNK_MASK)) @@ -754,7 +759,7 @@ void *xas_store(struct xa_state *xas, void *entry) bool value = xa_is_value(entry); if (entry) - first = xas_create(xas); + first = xas_create(xas, !xa_is_node(entry)); else first = xas_load(xas); @@ -1250,35 +1255,6 @@ void *xas_find_conflict(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_find_conflict); -/** - * xa_init_flags() - Initialise an empty XArray with flags. - * @xa: XArray. - * @flags: XA_FLAG values. - * - * If you need to initialise an XArray with special flags (eg you need - * to take the lock from interrupt context), use this function instead - * of xa_init(). - * - * Context: Any context. - */ -void xa_init_flags(struct xarray *xa, gfp_t flags) -{ - unsigned int lock_type; - static struct lock_class_key xa_lock_irq; - static struct lock_class_key xa_lock_bh; - - spin_lock_init(&xa->xa_lock); - xa->xa_flags = flags; - xa->xa_head = NULL; - - lock_type = xa_lock_type(xa); - if (lock_type == XA_LOCK_IRQ) - lockdep_set_class(&xa->xa_lock, &xa_lock_irq); - else if (lock_type == XA_LOCK_BH) - lockdep_set_class(&xa->xa_lock, &xa_lock_bh); -} -EXPORT_SYMBOL(xa_init_flags); - /** * xa_load() - Load an entry from an XArray. * @xa: XArray. @@ -1308,7 +1284,6 @@ static void *xas_result(struct xa_state *xas, void *curr) { if (xa_is_zero(curr)) return NULL; - XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr)); if (xas_error(xas)) curr = xas->xa_node; return curr; @@ -1378,7 +1353,7 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) XA_STATE(xas, xa, index); void *curr; - if (WARN_ON_ONCE(xa_is_internal(entry))) + if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); if (xa_track_free(xa) && !entry) entry = XA_ZERO_ENTRY; @@ -1444,7 +1419,7 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, XA_STATE(xas, xa, index); void *curr; - if (WARN_ON_ONCE(xa_is_internal(entry))) + if (WARN_ON_ONCE(xa_is_advanced(entry))) return XA_ERROR(-EINVAL); if (xa_track_free(xa) && !entry) entry = XA_ZERO_ENTRY; @@ -1464,6 +1439,47 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, } EXPORT_SYMBOL(__xa_cmpxchg); +/** + * __xa_insert() - Store this entry in the XArray if no entry is present. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Inserting a NULL entry will store a reserved entry (like xa_reserve()) + * if no entry is present. Inserting will fail if a reserved entry is + * present, even though loading from this index will return NULL. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 if the store succeeded. -EEXIST if another entry was present. + * -ENOMEM if memory could not be allocated. + */ +int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_advanced(entry))) + return -EINVAL; + if (!entry) + entry = XA_ZERO_ENTRY; + + do { + curr = xas_load(&xas); + if (!curr) { + xas_store(&xas, entry); + if (xa_track_free(xa)) + xas_clear_mark(&xas, XA_FREE_MARK); + } else { + xas_set_err(&xas, -EEXIST); + } + } while (__xas_nomem(&xas, gfp)); + + return xas_error(&xas); +} +EXPORT_SYMBOL(__xa_insert); + /** * __xa_reserve() - Reserve this index in the XArray. * @xa: XArray. @@ -1567,7 +1583,7 @@ void *xa_store_range(struct xarray *xa, unsigned long first, if (last + 1) order = __ffs(last + 1); xas_set_order(&xas, last, order); - xas_create(&xas); + xas_create(&xas, true); if (xas_error(&xas)) goto unlock; } @@ -1609,7 +1625,7 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) XA_STATE(xas, xa, 0); int err; - if (WARN_ON_ONCE(xa_is_internal(entry))) + if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; if (WARN_ON_ONCE(!xa_track_free(xa))) return -EINVAL; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8a8bb8796c6c..72e6d0c55cfa 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -689,6 +689,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; mutex_init(&bdi->cgwb_release_mutex); + init_rwsem(&bdi->wb_switch_rwsem); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { diff --git a/mm/mincore.c b/mm/mincore.c index f0f91461a9f4..218099b5ed31 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -42,14 +42,72 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, return 0; } +/* + * Later we can get more picky about what "in core" means precisely. + * For now, simply check to see if the page is in the page cache, + * and is up to date; i.e. that no page-in operation would be required + * at this time if an application were to map and access this page. + */ +static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) +{ + unsigned char present = 0; + struct page *page; + + /* + * When tmpfs swaps out a page from a file, any process mapping that + * file will not get a swp_entry_t in its pte, but rather it is like + * any other file mapping (ie. marked !present and faulted in with + * tmpfs's .fault). So swapped out tmpfs mappings are tested here. + */ +#ifdef CONFIG_SWAP + if (shmem_mapping(mapping)) { + page = find_get_entry(mapping, pgoff); + /* + * shmem/tmpfs may return swap: account for swapcache + * page too. + */ + if (xa_is_value(page)) { + swp_entry_t swp = radix_to_swp_entry(page); + page = find_get_page(swap_address_space(swp), + swp_offset(swp)); + } + } else + page = find_get_page(mapping, pgoff); +#else + page = find_get_page(mapping, pgoff); +#endif + if (page) { + present = PageUptodate(page); + put_page(page); + } + + return present; +} + +static int __mincore_unmapped_range(unsigned long addr, unsigned long end, + struct vm_area_struct *vma, unsigned char *vec) +{ + unsigned long nr = (end - addr) >> PAGE_SHIFT; + int i; + + if (vma->vm_file) { + pgoff_t pgoff; + + pgoff = linear_page_index(vma, addr); + for (i = 0; i < nr; i++, pgoff++) + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + } else { + for (i = 0; i < nr; i++) + vec[i] = 0; + } + return nr; +} + static int mincore_unmapped_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { - unsigned char *vec = walk->private; - unsigned long nr = (end - addr) >> PAGE_SHIFT; - - memset(vec, 0, nr); - walk->private += nr; + walk->private += __mincore_unmapped_range(addr, end, + walk->vma, walk->private); return 0; } @@ -69,9 +127,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, goto out; } - /* We'll consider a THP page under construction to be there */ if (pmd_trans_unstable(pmd)) { - memset(vec, 1, nr); + __mincore_unmapped_range(addr, end, vma, vec); goto out; } @@ -80,17 +137,28 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t pte = *ptep; if (pte_none(pte)) - *vec = 0; + __mincore_unmapped_range(addr, addr + PAGE_SIZE, + vma, vec); else if (pte_present(pte)) *vec = 1; else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); - /* - * migration or hwpoison entries are always - * uptodate - */ - *vec = !!non_swap_entry(entry); + if (non_swap_entry(entry)) { + /* + * migration or hwpoison entries are always + * uptodate + */ + *vec = 1; + } else { +#ifdef CONFIG_SWAP + *vec = mincore_page(swap_address_space(entry), + swp_offset(entry)); +#else + WARN_ON(1); + *vec = 1; +#endif + } } vec++; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d295c9bc01a8..35fdde041f5c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5701,18 +5701,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, cond_resched(); } } -#ifdef CONFIG_SPARSEMEM - /* - * If the zone does not span the rest of the section then - * we should at least initialize those pages. Otherwise we - * could blow up on a poisoned page in some paths which depend - * on full sections being initialized (e.g. memory hotplug). - */ - while (end_pfn % PAGES_PER_SECTION) { - __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); - end_pfn++; - } -#endif } #ifdef CONFIG_ZONE_DEVICE diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 70417e9b932d..314bbc8010fb 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -114,6 +114,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); + ax25_route_lock_use(); route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -206,9 +207,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - if (route) - ax25_put_route(route); + ax25_route_lock_unuse(); return NETDEV_TX_OK; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index a0eff323af12..66f74c85cf6b 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -40,7 +40,7 @@ #include static ax25_route *ax25_route_list; -static DEFINE_RWLOCK(ax25_route_lock); +DEFINE_RWLOCK(ax25_route_lock); void ax25_rt_device_down(struct net_device *dev) { @@ -335,6 +335,7 @@ const struct seq_operations ax25_rt_seqops = { * Find AX.25 route * * Only routes with a reference count of zero can be destroyed. + * Must be called with ax25_route_lock read locked. */ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { @@ -342,7 +343,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_route *ax25_def_rt = NULL; ax25_route *ax25_rt; - read_lock(&ax25_route_lock); /* * Bind to the physical interface we heard them on, or the default * route if none is found; @@ -365,11 +365,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) if (ax25_spe_rt != NULL) ax25_rt = ax25_spe_rt; - if (ax25_rt != NULL) - ax25_hold_route(ax25_rt); - - read_unlock(&ax25_route_lock); - return ax25_rt; } @@ -400,9 +395,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) ax25_route *ax25_rt; int err = 0; - if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) + ax25_route_lock_use(); + ax25_rt = ax25_get_route(addr, NULL); + if (!ax25_rt) { + ax25_route_lock_unuse(); return -EHOSTUNREACH; - + } if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; @@ -437,8 +435,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } put: - ax25_put_route(ax25_rt); - + ax25_route_lock_unuse(); return err; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 69244e4598f5..1dd70f048e7b 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -674,7 +674,7 @@ static void batadv_mcast_mla_update(struct work_struct *work) */ static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) { - if (ip_mc_check_igmp(skb, NULL) < 0) + if (ip_mc_check_igmp(skb) < 0) return false; switch (igmp_hdr(skb)->type) { @@ -741,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, */ static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) { - if (ipv6_mc_check_mld(skb, NULL) < 0) + if (ipv6_mc_check_mld(skb) < 0) return false; switch (icmp6_hdr(skb)->icmp6_type) { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index fa2644d276ef..2c5172b33209 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -240,3 +240,85 @@ out: kfree(data); return ret; } + +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct bpf_flow_keys flow_keys; + u64 time_start, time_spent = 0; + struct bpf_skb_data_end *cb; + u32 retval, duration; + struct sk_buff *skb; + struct sock *sk; + void *data; + int ret; + u32 i; + + if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) + return -EINVAL; + + data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + sk = kzalloc(sizeof(*sk), GFP_USER); + if (!sk) { + kfree(data); + return -ENOMEM; + } + sock_net_set(sk, current->nsproxy->net_ns); + sock_init_data(NULL, sk); + + skb = build_skb(data, 0); + if (!skb) { + kfree(data); + kfree(sk); + return -ENOMEM; + } + skb->sk = sk; + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + skb->protocol = eth_type_trans(skb, + current->nsproxy->net_ns->loopback_dev); + skb_reset_network_header(skb); + + cb = (struct bpf_skb_data_end *)skb->cb; + cb->qdisc_cb.flow_keys = &flow_keys; + + if (!repeat) + repeat = 1; + + time_start = ktime_get_ns(); + for (i = 0; i < repeat; i++) { + preempt_disable(); + rcu_read_lock(); + retval = __skb_flow_bpf_dissect(prog, skb, + &flow_keys_dissector, + &flow_keys); + rcu_read_unlock(); + preempt_enable(); + + if (need_resched()) { + if (signal_pending(current)) + break; + time_spent += ktime_get_ns() - time_start; + cond_resched(); + time_start = ktime_get_ns(); + } + } + time_spent += ktime_get_ns() - time_start; + do_div(time_spent, repeat); + duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; + + ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), + retval, duration); + + kfree_skb(skb); + kfree(sk); + return ret; +} diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S index 7f1c521dcc2f..9ea6100dca87 100644 --- a/net/bpfilter/bpfilter_umh_blob.S +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .bpfilter_umh, "a" + .section .rodata, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 6664cb8590f8..00573cc46c98 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1129,6 +1129,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, err = -ENOMEM; goto err_unlock; } + if (swdev_notify) + fdb->added_by_user = 1; fdb->added_by_external_learn = 1; fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } else { @@ -1148,6 +1150,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, modified = true; } + if (swdev_notify) + fdb->added_by_user = 1; + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 2cb8da465b98..48ddc60b4fbd 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -36,10 +36,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -98,12 +98,11 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } br_hook = NF_BR_LOCAL_OUT; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3aeff0895669..780757b7a82f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #if IS_ENABLED(CONFIG_IPV6) +#include #include #include #include @@ -938,7 +940,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, for (i = 0; i < num; i++) { len += sizeof(*grec); - if (!pskb_may_pull(skb, len)) + if (!ip_mc_may_pull(skb, len)) return -EINVAL; grec = (void *)(skb->data + len - sizeof(*grec)); @@ -946,7 +948,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, type = grec->grec_type; len += ntohs(grec->grec_nsrcs) * 4; - if (!pskb_may_pull(skb, len)) + if (!ip_mc_may_pull(skb, len)) return -EINVAL; /* We treat this as an IGMPv2 report for now. */ @@ -985,15 +987,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int nsrcs_offset; const unsigned char *src; struct icmp6hdr *icmp6h; struct mld2_grec *grec; + unsigned int grec_len; int i; int len; int num; int err = 0; - if (!pskb_may_pull(skb, sizeof(*icmp6h))) + if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; icmp6h = icmp6_hdr(skb); @@ -1003,21 +1007,25 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, for (i = 0; i < num; i++) { __be16 *nsrcs, _nsrcs; - nsrcs = skb_header_pointer(skb, - len + offsetof(struct mld2_grec, - grec_nsrcs), + nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs); + + if (skb_transport_offset(skb) + ipv6_transport_len(skb) < + nsrcs_offset + sizeof(_nsrcs)) + return -EINVAL; + + nsrcs = skb_header_pointer(skb, nsrcs_offset, sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; - if (!pskb_may_pull(skb, - len + sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs))) + grec_len = sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); + + if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); - len += sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + len += grec_len; /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { @@ -1219,6 +1227,7 @@ static void br_ip4_multicast_query(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int transport_len = ip_transport_len(skb); const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); struct net_bridge_mdb_entry *mp; @@ -1228,7 +1237,6 @@ static void br_ip4_multicast_query(struct net_bridge *br, struct br_ip saddr; unsigned long max_delay; unsigned long now = jiffies; - unsigned int offset = skb_transport_offset(skb); __be32 group; spin_lock(&br->multicast_lock); @@ -1238,14 +1246,14 @@ static void br_ip4_multicast_query(struct net_bridge *br, group = ih->group; - if (skb->len == offset + sizeof(*ih)) { + if (transport_len == sizeof(*ih)) { max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); if (!max_delay) { max_delay = 10 * HZ; group = 0; } - } else if (skb->len >= offset + sizeof(*ih3)) { + } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); if (ih3->nsrcs) goto out; @@ -1296,6 +1304,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + unsigned int transport_len = ipv6_transport_len(skb); const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; struct net_bridge_mdb_entry *mp; @@ -1315,7 +1324,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - if (skb->len == offset + sizeof(*mld)) { + if (transport_len == sizeof(*mld)) { if (!pskb_may_pull(skb, offset + sizeof(*mld))) { err = -EINVAL; goto out; @@ -1576,17 +1585,29 @@ static void br_multicast_pim(struct net_bridge *br, br_multicast_mark_router(br, port); } +static int br_ip4_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + if (ip_hdr(skb)->protocol != IPPROTO_IGMP || + igmp_hdr(skb)->type != IGMP_MRDISC_ADV) + return -ENOMSG; + + br_multicast_mark_router(br, port); + + return 0; +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb_trimmed = NULL; const unsigned char *src; struct igmphdr *ih; int err; - err = ip_mc_check_igmp(skb, &skb_trimmed); + err = ip_mc_check_igmp(skb); if (err == -ENOMSG) { if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) { @@ -1594,7 +1615,15 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) { if (ip_hdr(skb)->protocol == IPPROTO_PIM) br_multicast_pim(br, port, skb); + } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) { + err = br_ip4_multicast_mrd_rcv(br, port, skb); + + if (err < 0 && err != -ENOMSG) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } } + return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); @@ -1612,19 +1641,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid, src); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid); + err = br_ip4_multicast_igmp3_report(br, port, skb, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - br_ip4_multicast_query(br, port, skb_trimmed, vid); + br_ip4_multicast_query(br, port, skb, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid, src); break; } - if (skb_trimmed && skb_trimmed != skb) - kfree_skb(skb_trimmed); - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); @@ -1632,21 +1658,51 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, } #if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_mrd_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb) +{ + int ret; + + if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + ret = ipv6_mc_check_icmpv6(skb); + if (ret < 0) + return ret; + + if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV) + return -ENOMSG; + + br_multicast_mark_router(br, port); + + return 0; +} + static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { - struct sk_buff *skb_trimmed = NULL; const unsigned char *src; struct mld_msg *mld; int err; - err = ipv6_mc_check_mld(skb, &skb_trimmed); + err = ipv6_mc_check_mld(skb); if (err == -ENOMSG) { if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + + if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) { + err = br_ip6_multicast_mrd_rcv(br, port, skb); + + if (err < 0 && err != -ENOMSG) { + br_multicast_err_count(br, port, skb->protocol); + return err; + } + } + return 0; } else if (err < 0) { br_multicast_err_count(br, port, skb->protocol); @@ -1664,10 +1720,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, src); break; case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid); + err = br_ip6_multicast_mld2_report(br, port, skb, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb_trimmed, vid); + err = br_ip6_multicast_query(br, port, skb, vid); break; case ICMPV6_MGM_REDUCTION: src = eth_hdr(skb)->h_source; @@ -1675,9 +1731,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } - if (skb_trimmed && skb_trimmed != skb) - kfree_skb(skb_trimmed); - br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp, BR_MCAST_DIR_RX); @@ -1781,6 +1834,68 @@ void br_multicast_init(struct net_bridge *br) INIT_HLIST_HEAD(&br->mdb_list); } +static void br_ip4_multicast_join_snoopers(struct net_bridge *br) +{ + struct in_device *in_dev = in_dev_get(br->dev); + + if (!in_dev) + return; + + ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + in_dev_put(in_dev); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_join_snoopers(struct net_bridge *br) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); + ipv6_dev_mc_inc(br->dev, &addr); +} +#else +static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) +{ +} +#endif + +static void br_multicast_join_snoopers(struct net_bridge *br) +{ + br_ip4_multicast_join_snoopers(br); + br_ip6_multicast_join_snoopers(br); +} + +static void br_ip4_multicast_leave_snoopers(struct net_bridge *br) +{ + struct in_device *in_dev = in_dev_get(br->dev); + + if (WARN_ON(!in_dev)) + return; + + ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + in_dev_put(in_dev); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_snoopers(struct net_bridge *br) +{ + struct in6_addr addr; + + ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a)); + ipv6_dev_mc_dec(br->dev, &addr); +} +#else +static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) +{ +} +#endif + +static void br_multicast_leave_snoopers(struct net_bridge *br) +{ + br_ip4_multicast_leave_snoopers(br); + br_ip6_multicast_leave_snoopers(br); +} + static void __br_multicast_open(struct net_bridge *br, struct bridge_mcast_own_query *query) { @@ -1794,6 +1909,9 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open(br, &br->ip6_own_query); @@ -1809,6 +1927,9 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->ip6_other_query.timer); del_timer_sync(&br->ip6_own_query.timer); #endif + + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); } void br_multicast_dev_del(struct net_bridge *br) @@ -1944,8 +2065,10 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) br_mc_disabled_update(br->dev, val); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) + if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { + br_multicast_leave_snoopers(br); goto unlock; + } if (!netif_running(br->dev)) goto unlock; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c93c35bb73dd..40d058378b52 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -881,11 +881,6 @@ static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; -void br_netfilter_enable(void) -{ -} -EXPORT_SYMBOL_GPL(br_netfilter_enable); - /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static const struct nf_hook_ops br_nf_ops[] = { diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 94039f588f1d..564710f88f93 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 491828713e0b..6693e209efe8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1137,14 +1137,16 @@ static int do_replace(struct net *net, const void __user *user, tmp.name[sizeof(tmp.name) - 1] = 0; countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = vmalloc(sizeof(*newinfo) + countersize); + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); - newinfo->entries = vmalloc(tmp.entries_size); + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; @@ -2291,9 +2293,12 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; + if (tmp.nentries) { + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; + } + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 08cbed7d940e..419e8edf23ba 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/can/bcm.c b/net/can/bcm.c index 0af8f0db892a..79bb8afa9c0c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -67,6 +67,9 @@ */ #define MAX_NFRAMES 256 +/* limit timers to 400 days for sending/timeouts */ +#define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) + /* use of last_frames[index].flags */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ @@ -140,6 +143,22 @@ static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } +/* check limitations for timeval provided by user */ +static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) +{ + if ((msg_head->ival1.tv_sec < 0) || + (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival1.tv_usec < 0) || + (msg_head->ival1.tv_usec >= USEC_PER_SEC) || + (msg_head->ival2.tv_sec < 0) || + (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || + (msg_head->ival2.tv_usec < 0) || + (msg_head->ival2.tv_usec >= USEC_PER_SEC)) + return true; + + return false; +} + #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) @@ -873,6 +892,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { @@ -1053,6 +1076,10 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; + /* check timeval limitations */ + if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) + return -EINVAL; + /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d5718284db57..3661cdd927f1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3206,9 +3206,10 @@ void ceph_con_keepalive(struct ceph_connection *con) dout("con_keepalive %p\n", con); mutex_lock(&con->mutex); clear_standby(con); + con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING); mutex_unlock(&con->mutex); - if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && - con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) + + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); diff --git a/net/core/dev.c b/net/core/dev.c index 82f20022259d..8e276e0192a1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8712,6 +8712,9 @@ int init_dummy_netdev(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); set_bit(__LINK_STATE_START, &dev->state); + /* napi_busy_loop stats accounting wants this */ + dev_net_set(dev, &init_net); + /* Note : We dont allocate pcpu_refcnt for dummy devices, * because users of this 'device' dont need to change * its refcount. diff --git a/net/core/devlink.c b/net/core/devlink.c index 60248a53c0ad..e6f170caf449 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2697,6 +2697,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_WOL, + .name = DEVLINK_PARAM_GENERIC_WOL_NAME, + .type = DEVLINK_PARAM_GENERIC_WOL_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -2843,6 +2848,7 @@ nla_put_failure: } static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, + unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) @@ -2880,6 +2886,13 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; + + if (cmd == DEVLINK_CMD_PORT_PARAM_GET || + cmd == DEVLINK_CMD_PORT_PARAM_NEW || + cmd == DEVLINK_CMD_PORT_PARAM_DEL) + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) + goto genlmsg_cancel; + param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM); if (!param_attr) goto genlmsg_cancel; @@ -2922,18 +2935,22 @@ genlmsg_cancel: } static void devlink_param_notify(struct devlink *devlink, + unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd) { struct sk_buff *msg; int err; - WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL); + WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL && + cmd != DEVLINK_CMD_PORT_PARAM_NEW && + cmd != DEVLINK_CMD_PORT_PARAM_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0); + err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd, + 0, 0, 0); if (err) { nlmsg_free(msg); return; @@ -2962,7 +2979,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_param_fill(msg, devlink, param_item, + err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -3051,7 +3068,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param, } static struct devlink_param_item * -devlink_param_get_from_info(struct devlink *devlink, +devlink_param_get_from_info(struct list_head *param_list, struct genl_info *info) { char *param_name; @@ -3060,7 +3077,7 @@ devlink_param_get_from_info(struct devlink *devlink, return NULL; param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]); - return devlink_param_find_by_name(&devlink->param_list, param_name); + return devlink_param_find_by_name(param_list, param_name); } static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, @@ -3071,7 +3088,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - param_item = devlink_param_get_from_info(devlink, info); + param_item = devlink_param_get_from_info(&devlink->param_list, info); if (!param_item) return -EINVAL; @@ -3079,7 +3096,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_param_fill(msg, devlink, param_item, + err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, info->snd_portid, info->snd_seq, 0); if (err) { @@ -3090,10 +3107,12 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, - struct genl_info *info) +static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + struct genl_info *info, + enum devlink_command cmd) { - struct devlink *devlink = info->user_ptr[0]; enum devlink_param_type param_type; struct devlink_param_gset_ctx ctx; enum devlink_param_cmode cmode; @@ -3102,7 +3121,7 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, union devlink_param_value value; int err = 0; - param_item = devlink_param_get_from_info(devlink, info); + param_item = devlink_param_get_from_info(param_list, info); if (!param_item) return -EINVAL; param = param_item->param; @@ -3142,17 +3161,28 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, return err; } - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, port_index, param_item, cmd); return 0; } +static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list, + info, DEVLINK_CMD_PARAM_NEW); +} + static int devlink_param_register_one(struct devlink *devlink, - const struct devlink_param *param) + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) { struct devlink_param_item *param_item; - if (devlink_param_find_by_name(&devlink->param_list, - param->name)) + if (devlink_param_find_by_name(param_list, param->name)) return -EEXIST; if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) @@ -3165,24 +3195,111 @@ static int devlink_param_register_one(struct devlink *devlink, return -ENOMEM; param_item->param = param; - list_add_tail(¶m_item->list, &devlink->param_list); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + list_add_tail(¶m_item->list, param_list); + devlink_param_notify(devlink, port_index, param_item, cmd); return 0; } static void devlink_param_unregister_one(struct devlink *devlink, - const struct devlink_param *param) + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) { struct devlink_param_item *param_item; - param_item = devlink_param_find_by_name(&devlink->param_list, - param->name); + param_item = devlink_param_find_by_name(param_list, param->name); WARN_ON(!param_item); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL); + devlink_param_notify(devlink, port_index, param_item, cmd); list_del(¶m_item->list); kfree(param_item); } +static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_param_item *param_item; + struct devlink_port *devlink_port; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_port, &devlink->port_list, list) { + list_for_each_entry(param_item, + &devlink_port->param_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_param_fill(msg, + devlink_port->devlink, + devlink_port->index, param_item, + DEVLINK_CMD_PORT_PARAM_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_param_item *param_item; + struct sk_buff *msg; + int err; + + param_item = devlink_param_get_from_info(&devlink_port->param_list, + info); + if (!param_item) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_param_fill(msg, devlink_port->devlink, + devlink_port->index, param_item, + DEVLINK_CMD_PORT_PARAM_GET, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + + return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, info, + DEVLINK_CMD_PORT_PARAM_NEW); +} + static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, struct devlink *devlink, struct devlink_snapshot *snapshot) @@ -3597,1012 +3714,6 @@ out: return 0; } -#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN) -#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2) -#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE) -#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024 - -struct devlink_health_buffer { - void *data; - u64 offset; - u64 bytes_left; - u64 bytes_left_metadata; - u64 max_nested_depth; - u64 curr_nest; -}; - -struct devlink_health_buffer_desc { - int attrtype; - u16 len; - u8 nla_type; - u8 nest_end; - int value[0]; -}; - -static void -devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list, - u64 num_of_buffers) -{ - u64 i; - - for (i = 0; i < num_of_buffers; i++) { - memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE); - buffers_list[i]->offset = 0; - buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE; - buffers_list[i]->bytes_left_metadata = - DEVLINK_HEALTH_BUFFER_DATA_SIZE; - buffers_list[i]->max_nested_depth = 0; - buffers_list[i]->curr_nest = 0; - } -} - -static void -devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, - u64 size); - -static struct devlink_health_buffer ** -devlink_health_buffers_create(u64 size) -{ - struct devlink_health_buffer **buffers_list; - u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size); - u64 i; - - buffers_list = kcalloc(num_of_buffers, - sizeof(struct devlink_health_buffer *), - GFP_KERNEL); - if (!buffers_list) - return NULL; - - for (i = 0; i < num_of_buffers; i++) { - struct devlink_health_buffer *buffer; - void *data; - - buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); - data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL); - if (!buffer || !data) { - kfree(buffer); - kfree(data); - goto buffers_cleanup; - } - buffers_list[i] = buffer; - buffer->data = data; - } - devlink_health_buffers_reset(buffers_list, num_of_buffers); - - return buffers_list; - -buffers_cleanup: - devlink_health_buffers_destroy(buffers_list, --i); - kfree(buffers_list); - return NULL; -} - -static void -devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list, - u64 num_of_buffers) -{ - u64 i; - - for (i = 0; i < num_of_buffers; i++) { - kfree(buffers_list[i]->data); - kfree(buffers_list[i]); - } -} - -void -devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer, - int len) -{ - buffer->offset += len; -} - -/* In order to store a nest, need two descriptors, for start and end */ -#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2) - -int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer, - int len, int metadata_len) -{ - if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE) - return -EINVAL; - - if (buffer->bytes_left < len || - buffer->bytes_left_metadata < metadata_len) - return -ENOMEM; - - return 0; -} - -static struct devlink_health_buffer_desc * -devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer) -{ - return buffer->data + buffer->offset; -} - -int -devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer, - int attrtype) -{ - struct devlink_health_buffer_desc *desc; - int err; - - err = devlink_health_buffer_verify_len(buffer, 0, - DEVLINK_HEALTH_BUFFER_NEST_SIZE); - if (err) - return err; - - if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT && - attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR && - attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE && - attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY) - return -EINVAL; - - desc = devlink_health_buffer_get_desc_from_offset(buffer); - - desc->attrtype = attrtype; - buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE; - devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); - - buffer->curr_nest++; - buffer->max_nested_depth = max(buffer->max_nested_depth, - buffer->curr_nest); - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start); - -enum devlink_health_buffer_nest_end_cancel { - DEVLINK_HEALTH_BUFFER_NEST_END = 1, - DEVLINK_HEALTH_BUFFER_NEST_CANCEL, -}; - -static void -devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer, - enum devlink_health_buffer_nest_end_cancel nest) -{ - struct devlink_health_buffer_desc *desc; - - WARN_ON(!buffer->curr_nest); - buffer->curr_nest--; - - desc = devlink_health_buffer_get_desc_from_offset(buffer); - desc->nest_end = nest; - devlink_health_buffer_offset_inc(buffer, sizeof(*desc)); -} - -void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer) -{ - devlink_health_buffer_nest_end_cancel(buffer, - DEVLINK_HEALTH_BUFFER_NEST_END); -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end); - -void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer) -{ - devlink_health_buffer_nest_end_cancel(buffer, - DEVLINK_HEALTH_BUFFER_NEST_CANCEL); -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel); - -int -devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer, - char *name) -{ - struct devlink_health_buffer_desc *desc; - int err; - - err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1, - sizeof(*desc)); - if (err) - return err; - - desc = devlink_health_buffer_get_desc_from_offset(buffer); - desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME; - desc->nla_type = NLA_NUL_STRING; - desc->len = strlen(name) + 1; - memcpy(&desc->value, name, desc->len); - devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); - - buffer->bytes_left_metadata -= sizeof(*desc); - buffer->bytes_left -= (strlen(name) + 1); - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name); - -static int -devlink_health_buffer_put_value(struct devlink_health_buffer *buffer, - u8 nla_type, void *value, int len) -{ - struct devlink_health_buffer_desc *desc; - int err; - - err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc)); - if (err) - return err; - - desc = devlink_health_buffer_get_desc_from_offset(buffer); - desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA; - desc->nla_type = nla_type; - desc->len = len; - memcpy(&desc->value, value, len); - devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len); - - buffer->bytes_left_metadata -= sizeof(*desc); - buffer->bytes_left -= len; - - return 0; -} - -int -devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer, - u8 value) -{ - int err; - - err = devlink_health_buffer_put_value(buffer, NLA_U8, &value, - sizeof(value)); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8); - -int -devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer, - u32 value) -{ - int err; - - err = devlink_health_buffer_put_value(buffer, NLA_U32, &value, - sizeof(value)); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32); - -int -devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer, - u64 value) -{ - int err; - - err = devlink_health_buffer_put_value(buffer, NLA_U64, &value, - sizeof(value)); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64); - -int -devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer, - char *name) -{ - int err; - - if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) - return -EINVAL; - - err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name, - strlen(name) + 1); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string); - -int -devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, - void *data, int len) -{ - int err; - - if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK) - return -EINVAL; - - err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data); - -static int -devlink_health_buffer_fill_data(struct sk_buff *skb, - struct devlink_health_buffer_desc *desc) -{ - int err = -EINVAL; - - switch (desc->nla_type) { - case NLA_U8: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, - *(u8 *)desc->value); - break; - case NLA_U32: - err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, - *(u32 *)desc->value); - break; - case NLA_U64: - err = nla_put_u64_64bit(skb, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, - *(u64 *)desc->value, DEVLINK_ATTR_PAD); - break; - case NLA_NUL_STRING: - err = nla_put_string(skb, - DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, - (char *)&desc->value); - break; - case NLA_BINARY: - err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA, - desc->len, (void *)&desc->value); - break; - } - - return err; -} - -static int -devlink_health_buffer_fill_type(struct sk_buff *skb, - struct devlink_health_buffer_desc *desc) -{ - int err = -EINVAL; - - switch (desc->nla_type) { - case NLA_U8: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, - NLA_U8); - break; - case NLA_U32: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, - NLA_U32); - break; - case NLA_U64: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, - NLA_U64); - break; - case NLA_NUL_STRING: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, - NLA_NUL_STRING); - break; - case NLA_BINARY: - err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE, - NLA_BINARY); - break; - } - - return err; -} - -static inline struct devlink_health_buffer_desc * -devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc) -{ - return (void *)&desc->value + desc->len; -} - -static int -devlink_health_buffer_prepare_skb(struct sk_buff *skb, - struct devlink_health_buffer *buffer) -{ - struct devlink_health_buffer_desc *last_desc, *desc; - struct nlattr **buffer_nlattr; - int err; - int i = 0; - - buffer_nlattr = kcalloc(buffer->max_nested_depth, - sizeof(*buffer_nlattr), GFP_KERNEL); - if (!buffer_nlattr) - return -EINVAL; - - last_desc = devlink_health_buffer_get_desc_from_offset(buffer); - desc = buffer->data; - while (desc != last_desc) { - switch (desc->attrtype) { - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT: - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR: - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE: - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY: - buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype); - if (!buffer_nlattr[i]) - goto nla_put_failure; - i++; - break; - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA: - err = devlink_health_buffer_fill_data(skb, desc); - if (err) - goto nla_put_failure; - err = devlink_health_buffer_fill_type(skb, desc); - if (err) - goto nla_put_failure; - break; - case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME: - err = nla_put_string(skb, desc->attrtype, - (char *)&desc->value); - if (err) - goto nla_put_failure; - break; - default: - WARN_ON(!desc->nest_end); - WARN_ON(i <= 0); - if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END) - nla_nest_end(skb, buffer_nlattr[--i]); - else - nla_nest_cancel(skb, buffer_nlattr[--i]); - break; - } - desc = devlink_health_buffer_get_next_desc(desc); - } - - return 0; - -nla_put_failure: - kfree(buffer_nlattr); - return err; -} - -static int -devlink_health_buffer_snd(struct genl_info *info, - enum devlink_command cmd, int flags, - struct devlink_health_buffer **buffers_array, - u64 num_of_buffers) -{ - struct sk_buff *skb; - struct nlmsghdr *nlh; - void *hdr; - int err; - u64 i; - - for (i = 0; i < num_of_buffers; i++) { - /* Skip buffer if driver did not fill it up with any data */ - if (!buffers_array[i]->offset) - continue; - - skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, - &devlink_nl_family, NLM_F_MULTI, cmd); - if (!hdr) - goto nla_put_failure; - - err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]); - if (err) - goto nla_put_failure; - - genlmsg_end(skb, hdr); - err = genlmsg_reply(skb, info); - if (err) - return err; - } - - skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; - nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, - NLMSG_DONE, 0, flags | NLM_F_MULTI); - err = genlmsg_reply(skb, info); - if (err) - return err; - - return 0; - -nla_put_failure: - err = -EIO; - nlmsg_free(skb); - return err; -} - -struct devlink_health_reporter { - struct list_head list; - struct devlink_health_buffer **dump_buffers_array; - struct mutex dump_lock; /* lock parallel read/write from dump buffers */ - struct devlink_health_buffer **diagnose_buffers_array; - struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */ - void *priv; - const struct devlink_health_reporter_ops *ops; - struct devlink *devlink; - u64 graceful_period; - bool auto_recover; - u8 health_state; - u8 dump_avail; - u64 dump_ts; - u64 error_count; - u64 recovery_count; - u64 last_recovery_ts; -}; - -enum devlink_health_reporter_state { - DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, - DEVLINK_HEALTH_REPORTER_STATE_ERROR, -}; - -void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return reporter->priv; -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); - -static struct devlink_health_reporter * -devlink_health_reporter_find_by_name(struct devlink *devlink, - const char *reporter_name) -{ - struct devlink_health_reporter *reporter; - - list_for_each_entry(reporter, &devlink->reporter_list, list) - if (!strcmp(reporter->ops->name, reporter_name)) - return reporter; - return NULL; -} - -/** - * devlink_health_reporter_create - create devlink health reporter - * - * @devlink: devlink - * @ops: ops - * @graceful_period: to avoid recovery loops, in msecs - * @auto_recover: auto recover when error occurs - * @priv: priv - */ -struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, bool auto_recover, - void *priv) -{ - struct devlink_health_reporter *reporter; - - mutex_lock(&devlink->lock); - if (devlink_health_reporter_find_by_name(devlink, ops->name)) { - reporter = ERR_PTR(-EEXIST); - goto unlock; - } - - if (WARN_ON(ops->dump && !ops->dump_size) || - WARN_ON(ops->diagnose && !ops->diagnose_size) || - WARN_ON(auto_recover && !ops->recover) || - WARN_ON(graceful_period && !ops->recover)) { - reporter = ERR_PTR(-EINVAL); - goto unlock; - } - - reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); - if (!reporter) { - reporter = ERR_PTR(-ENOMEM); - goto unlock; - } - - if (ops->dump) { - reporter->dump_buffers_array = - devlink_health_buffers_create(ops->dump_size); - if (!reporter->dump_buffers_array) { - kfree(reporter); - reporter = ERR_PTR(-ENOMEM); - goto unlock; - } - } - - if (ops->diagnose) { - reporter->diagnose_buffers_array = - devlink_health_buffers_create(ops->diagnose_size); - if (!reporter->diagnose_buffers_array) { - devlink_health_buffers_destroy(reporter->dump_buffers_array, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size)); - kfree(reporter); - reporter = ERR_PTR(-ENOMEM); - goto unlock; - } - } - - list_add_tail(&reporter->list, &devlink->reporter_list); - mutex_init(&reporter->dump_lock); - mutex_init(&reporter->diagnose_lock); - - reporter->priv = priv; - reporter->ops = ops; - reporter->devlink = devlink; - reporter->graceful_period = graceful_period; - reporter->auto_recover = auto_recover; -unlock: - mutex_unlock(&devlink->lock); - return reporter; -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_create); - -/** - * devlink_health_reporter_destroy - destroy devlink health reporter - * - * @reporter: devlink health reporter to destroy - */ -void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ - mutex_lock(&reporter->devlink->lock); - list_del(&reporter->list); - devlink_health_buffers_destroy(reporter->dump_buffers_array, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); - devlink_health_buffers_destroy(reporter->diagnose_buffers_array, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size)); - kfree(reporter); - mutex_unlock(&reporter->devlink->lock); -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); - -static int -devlink_health_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx) -{ - int err; - - if (!reporter->ops->recover) - return -EOPNOTSUPP; - - err = reporter->ops->recover(reporter, priv_ctx); - if (err) - return err; - - reporter->recovery_count++; - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - reporter->last_recovery_ts = jiffies; - - return 0; -} - -static int devlink_health_do_dump(struct devlink_health_reporter *reporter, - void *priv_ctx) -{ - int err; - - if (!reporter->ops->dump) - return 0; - - if (reporter->dump_avail) - return 0; - - devlink_health_buffers_reset(reporter->dump_buffers_array, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); - err = reporter->ops->dump(reporter, reporter->dump_buffers_array, - DEVLINK_HEALTH_BUFFER_SIZE, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size), - priv_ctx); - if (!err) { - reporter->dump_avail = true; - reporter->dump_ts = jiffies; - } - - return err; -} - -int devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - struct devlink *devlink = reporter->devlink; - int err = 0; - - /* write a log message of the current error */ - WARN_ON(!msg); - trace_devlink_health_report(devlink, reporter->ops->name, msg); - reporter->error_count++; - - /* abort if the previous error wasn't recovered */ - if (reporter->auto_recover && - (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || - jiffies - reporter->last_recovery_ts < - msecs_to_jiffies(reporter->graceful_period))) { - trace_devlink_health_recover_aborted(devlink, - reporter->ops->name, - reporter->health_state, - jiffies - - reporter->last_recovery_ts); - return -ECANCELED; - } - - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - - mutex_lock(&reporter->dump_lock); - /* store current dump of current error, for later analysis */ - devlink_health_do_dump(reporter, priv_ctx); - mutex_unlock(&reporter->dump_lock); - - if (reporter->auto_recover) - err = devlink_health_reporter_recover(reporter, priv_ctx); - - return err; -} -EXPORT_SYMBOL_GPL(devlink_health_report); - -static struct devlink_health_reporter * -devlink_health_reporter_get_from_info(struct devlink *devlink, - struct genl_info *info) -{ - char *reporter_name; - - if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) - return NULL; - - reporter_name = - nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - return devlink_health_reporter_find_by_name(devlink, reporter_name); -} - -static int -devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink *devlink, - struct devlink_health_reporter *reporter, - enum devlink_command cmd, u32 portid, - u32 seq, int flags) -{ - struct nlattr *reporter_attr; - void *hdr; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - if (devlink_nl_put_handle(msg, devlink)) - goto genlmsg_cancel; - - reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); - if (!reporter_attr) - goto genlmsg_cancel; - if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, - reporter->ops->name)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, - reporter->health_state)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, - reporter->error_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, - reporter->recovery_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, - reporter->graceful_period, - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, - reporter->auto_recover)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL, - reporter->dump_avail)) - goto reporter_nest_cancel; - if (reporter->dump_avail && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, - jiffies_to_msecs(reporter->dump_ts), - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - - nla_nest_end(msg, reporter_attr); - genlmsg_end(msg, hdr); - return 0; - -reporter_nest_cancel: - nla_nest_end(msg, reporter_attr); -genlmsg_cancel: - genlmsg_cancel(msg, hdr); - return -EMSGSIZE; -} - -static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - struct sk_buff *msg; - int err; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - err = devlink_nl_health_reporter_fill(msg, devlink, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - info->snd_portid, info->snd_seq, - 0); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); -} - -static int -devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) -{ - struct devlink_health_reporter *reporter; - struct devlink *devlink; - int start = cb->args[0]; - int idx = 0; - int err; - - mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - continue; - mutex_lock(&devlink->lock); - list_for_each_entry(reporter, &devlink->reporter_list, - list) { - if (idx < start) { - idx++; - continue; - } - err = devlink_nl_health_reporter_fill(msg, devlink, - reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - mutex_unlock(&devlink->lock); - goto out; - } - idx++; - } - mutex_unlock(&devlink->lock); - } -out: - mutex_unlock(&devlink_mutex); - - cb->args[0] = idx; - return msg->len; -} - -static int -devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->recover && - (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) - return -EINVAL; - - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) - reporter->graceful_period = - nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); - - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) - reporter->auto_recover = - nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); - - return 0; -} - -static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - return devlink_health_reporter_recover(reporter, NULL); -} - -static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - u64 num_of_buffers; - int err; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->diagnose) - return -EOPNOTSUPP; - - num_of_buffers = - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size); - - mutex_lock(&reporter->diagnose_lock); - devlink_health_buffers_reset(reporter->diagnose_buffers_array, - num_of_buffers); - - err = reporter->ops->diagnose(reporter, - reporter->diagnose_buffers_array, - DEVLINK_HEALTH_BUFFER_SIZE, - num_of_buffers); - if (err) - goto out; - - err = devlink_health_buffer_snd(info, - DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - 0, reporter->diagnose_buffers_array, - num_of_buffers); - if (err) - goto out; - - mutex_unlock(&reporter->diagnose_lock); - return 0; - -out: - mutex_unlock(&reporter->diagnose_lock); - return err; -} - -static void -devlink_health_dump_clear(struct devlink_health_reporter *reporter) -{ - reporter->dump_avail = false; - reporter->dump_ts = 0; - devlink_health_buffers_reset(reporter->dump_buffers_array, - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); -} - -static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - u64 num_of_buffers; - int err; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->dump) - return -EOPNOTSUPP; - - num_of_buffers = - DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size); - - mutex_lock(&reporter->dump_lock); - err = devlink_health_do_dump(reporter, NULL); - if (err) - goto out; - - err = devlink_health_buffer_snd(info, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - 0, reporter->dump_buffers_array, - num_of_buffers); - -out: - mutex_unlock(&reporter->dump_lock); - return err; -} - -static int -devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - mutex_lock(&reporter->dump_lock); - devlink_health_dump_clear(reporter); - mutex_unlock(&reporter->dump_lock); - return 0; -} - static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -4628,9 +3739,6 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -4829,6 +3937,21 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .doit = devlink_nl_cmd_port_param_get_doit, + .dumpit = devlink_nl_cmd_port_param_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .doit = devlink_nl_cmd_port_param_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, + }, { .cmd = DEVLINK_CMD_REGION_GET, .doit = devlink_nl_cmd_region_get_doit, @@ -4851,51 +3974,6 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, - .doit = devlink_nl_cmd_health_reporter_get_doit, - .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, - .policy = devlink_nl_policy, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, - .doit = devlink_nl_cmd_health_reporter_set_doit, - .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, - .doit = devlink_nl_cmd_health_reporter_recover_doit, - .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - .doit = devlink_nl_cmd_health_reporter_diagnose_doit, - .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - .doit = devlink_nl_cmd_health_reporter_dump_get_doit, - .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, - .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, - .policy = devlink_nl_policy, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | - DEVLINK_NL_FLAG_NO_LOCK, - }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -4936,7 +4014,6 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); - INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; } @@ -5009,6 +4086,7 @@ int devlink_port_register(struct devlink *devlink, devlink_port->index = port_index; devlink_port->registered = true; list_add_tail(&devlink_port->list, &devlink->port_list); + INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); return 0; @@ -5526,6 +4604,71 @@ out: } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); +static int devlink_param_verify(const struct devlink_param *param) +{ + if (!param || !param->name || !param->supported_cmodes) + return -EINVAL; + if (param->generic) + return devlink_param_generic_verify(param); + else + return devlink_param_driver_verify(param); +} + +static int __devlink_params_register(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command reg_cmd, + enum devlink_command unreg_cmd) +{ + const struct devlink_param *param = params; + int i; + int err; + + mutex_lock(&devlink->lock); + for (i = 0; i < params_count; i++, param++) { + err = devlink_param_verify(param); + if (err) + goto rollback; + + err = devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); + if (err) + goto rollback; + } + + mutex_unlock(&devlink->lock); + return 0; + +rollback: + if (!i) + goto unlock; + for (param--; i > 0; i--, param--) + devlink_param_unregister_one(devlink, port_index, param_list, + param, unreg_cmd); +unlock: + mutex_unlock(&devlink->lock); + return err; +} + +static void __devlink_params_unregister(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command cmd) +{ + const struct devlink_param *param = params; + int i; + + mutex_lock(&devlink->lock); + for (i = 0; i < params_count; i++, param++) + devlink_param_unregister_one(devlink, 0, param_list, param, + cmd); + mutex_unlock(&devlink->lock); +} + /** * devlink_params_register - register configuration parameters * @@ -5539,41 +4682,10 @@ int devlink_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i; - int err; - - mutex_lock(&devlink->lock); - for (i = 0; i < params_count; i++, param++) { - if (!param || !param->name || !param->supported_cmodes) { - err = -EINVAL; - goto rollback; - } - if (param->generic) { - err = devlink_param_generic_verify(param); - if (err) - goto rollback; - } else { - err = devlink_param_driver_verify(param); - if (err) - goto rollback; - } - err = devlink_param_register_one(devlink, param); - if (err) - goto rollback; - } - - mutex_unlock(&devlink->lock); - return 0; - -rollback: - if (!i) - goto unlock; - for (param--; i > 0; i--, param--) - devlink_param_unregister_one(devlink, param); -unlock: - mutex_unlock(&devlink->lock); - return err; + return __devlink_params_register(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_NEW, + DEVLINK_CMD_PARAM_DEL); } EXPORT_SYMBOL_GPL(devlink_params_register); @@ -5587,16 +4699,104 @@ void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i; - - mutex_lock(&devlink->lock); - for (i = 0; i < params_count; i++, param++) - devlink_param_unregister_one(devlink, param); - mutex_unlock(&devlink->lock); + return __devlink_params_unregister(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_DEL); } EXPORT_SYMBOL_GPL(devlink_params_unregister); +/** + * devlink_port_params_register - register port configuration parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the port. + */ +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, params, + params_count, + DEVLINK_CMD_PORT_PARAM_NEW, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_register); + +/** + * devlink_port_params_unregister - unregister port configuration + * parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + */ +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_unregister(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + params, params_count, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_unregister); + +static int +__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); + if (!param_item) + return -EINVAL; + + if (!param_item->driverinit_value_valid || + !devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) + return -EOPNOTSUPP; + + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(init_val->vstr, param_item->driverinit_value.vstr); + else + *init_val = param_item->driverinit_value; + + return 0; +} + +static int +__devlink_param_driverinit_value_set(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, u32 param_id, + union devlink_param_value init_val, + enum devlink_command cmd) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); + if (!param_item) + return -EINVAL; + + if (!devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) + return -EOPNOTSUPP; + + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; + param_item->driverinit_value_valid = true; + + devlink_param_notify(devlink, port_index, param_item, cmd); + return 0; +} + /** * devlink_param_driverinit_value_get - get configuration parameter * value for driver initializing @@ -5611,26 +4811,11 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister); int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) { - struct devlink_param_item *param_item; - if (!devlink->ops || !devlink->ops->reload) return -EOPNOTSUPP; - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); - if (!param_item) - return -EINVAL; - - if (!param_item->driverinit_value_valid || - !devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) - return -EOPNOTSUPP; - - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(init_val->vstr, param_item->driverinit_value.vstr); - else - *init_val = param_item->driverinit_value; - - return 0; + return __devlink_param_driverinit_value_get(&devlink->param_list, + param_id, init_val); } EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); @@ -5649,27 +4834,62 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val) { - struct devlink_param_item *param_item; - - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); - if (!param_item) - return -EINVAL; - - if (!devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) - return -EOPNOTSUPP; - - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, init_val.vstr); - else - param_item->driverinit_value = init_val; - param_item->driverinit_value_valid = true; - - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); - return 0; + return __devlink_param_driverinit_value_set(devlink, 0, + &devlink->param_list, + param_id, init_val, + DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); +/** + * devlink_port_param_driverinit_value_get - get configuration parameter + * value for driver initializing + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter in driverinit configuration mode + * + * This function should be used by the driver to get driverinit + * configuration for initialization after reload command. + */ +int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink *devlink = devlink_port->devlink; + + if (!devlink->ops || !devlink->ops->reload) + return -EOPNOTSUPP; + + return __devlink_param_driverinit_value_get(&devlink_port->param_list, + param_id, init_val); +} +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get); + +/** + * devlink_port_param_driverinit_value_set - set value of configuration + * parameter for driverinit + * configuration mode + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter to set for driverinit configuration mode + * + * This function should be used by the driver to set driverinit + * configuration mode default value. + */ +int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val) +{ + return __devlink_param_driverinit_value_set(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + param_id, init_val, + DEVLINK_CMD_PORT_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set); + /** * devlink_param_value_changed - notify devlink on a parameter's value * change. Should be called by the driver @@ -5681,7 +4901,6 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); * This function should be used by the driver to notify devlink on value * change, excluding driverinit configuration mode. * For driverinit configuration mode driver should use the function - * devlink_param_driverinit_value_set() instead. */ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) { @@ -5690,10 +4909,37 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) param_item = devlink_param_find_by_id(&devlink->param_list, param_id); WARN_ON(!param_item); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devlink_param_value_changed); +/** + * devlink_port_param_value_changed - notify devlink on a parameter's value + * change. Should be called by the driver + * right after the change. + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * + * This function should be used by the driver to notify devlink on value + * change, excluding driverinit configuration mode. + * For driverinit configuration mode driver should use the function + * devlink_port_param_driverinit_value_set() instead. + */ +void devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(&devlink_port->param_list, + param_id); + WARN_ON(!param_item); + + devlink_param_notify(devlink_port->devlink, devlink_port->index, + param_item, DEVLINK_CMD_PORT_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_param_value_changed); + /** * devlink_param_value_str_fill - Safely fill-up the string preventing * from overflow of the preallocated buffer diff --git a/net/core/filter.c b/net/core/filter.c index 2b3b436ef545..41984ad4b9b4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2020,18 +2020,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, u32 flags) { - /* skb->mac_len is not set on normal egress */ - unsigned int mlen = skb->network_header - skb->mac_header; + unsigned int mlen = skb_network_offset(skb); - __skb_pull(skb, mlen); + if (mlen) { + __skb_pull(skb, mlen); - /* At ingress, the mac header has already been pulled once. - * At egress, skb_pospull_rcsum has to be done in case that - * the skb is originated from ingress (i.e. a forwarded skb) - * to ensure that rcsum starts at net header. - */ - if (!skb_at_tc_ingress(skb)) - skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + } skb_pop_mac_header(skb); skb_reset_mac_len(skb); return flags & BPF_F_INGRESS ? @@ -4119,6 +4120,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; case SO_MAX_PACING_RATE: /* 32bit version */ + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); @@ -4132,7 +4137,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_rcvlowat = val ? : 1; break; case SO_MARK: - sk->sk_mark = val; + if (sk->sk_mark != val) { + sk->sk_mark = val; + sk_dst_reset(sk); + } break; default: ret = -EINVAL; @@ -5309,7 +5317,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); - /* else: fall through */ + /* else, fall through */ default: return NULL; } @@ -6700,6 +6708,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, target_size)); break; + case offsetof(struct __sk_buff, gso_segs): + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_segs, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4); @@ -7690,6 +7719,7 @@ const struct bpf_verifier_ops flow_dissector_verifier_ops = { }; const struct bpf_prog_ops flow_dissector_prog_ops = { + .test_run = bpf_prog_test_run_flow_dissector, }; int sk_detach_filter(struct sock *sk) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 9f2840510e63..bb1a54747d64 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -683,6 +683,46 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } +bool __skb_flow_bpf_dissect(struct bpf_prog *prog, + const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + struct bpf_flow_keys *flow_keys) +{ + struct bpf_skb_data_end cb_saved; + struct bpf_skb_data_end *cb; + u32 result; + + /* Note that even though the const qualifier is discarded + * throughout the execution of the BPF program, all changes(the + * control block) are reverted after the BPF program returns. + * Therefore, __skb_flow_dissect does not alter the skb. + */ + + cb = (struct bpf_skb_data_end *)skb->cb; + + /* Save Control Block */ + memcpy(&cb_saved, cb, sizeof(cb_saved)); + memset(cb, 0, sizeof(*cb)); + + /* Pass parameters to the BPF program */ + memset(flow_keys, 0, sizeof(*flow_keys)); + cb->qdisc_cb.flow_keys = flow_keys; + flow_keys->nhoff = skb_network_offset(skb); + flow_keys->thoff = flow_keys->nhoff; + + bpf_compute_data_pointers((struct sk_buff *)skb); + result = BPF_PROG_RUN(prog, skb); + + /* Restore state */ + memcpy(cb, &cb_saved, sizeof(cb_saved)); + + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len); + flow_keys->thoff = clamp_t(u16, flow_keys->thoff, + flow_keys->nhoff, skb->len); + + return result == BPF_OK; +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -714,7 +754,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_vlan *key_vlan; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; - struct bpf_prog *attached = NULL; int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -754,53 +793,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); - rcu_read_lock(); if (skb) { + struct bpf_flow_keys flow_keys; + struct bpf_prog *attached = NULL; + + rcu_read_lock(); + if (skb->dev) attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); else if (skb->sk) attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); else WARN_ON_ONCE(1); - } - if (attached) { - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - struct bpf_flow_keys flow_keys = {}; - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; - u32 result; - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(cb_saved)); - - /* Pass parameters to the BPF program */ - cb->qdisc_cb.flow_keys = &flow_keys; - flow_keys.nhoff = nhoff; - flow_keys.thoff = nhoff; - - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(attached, skb); - - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); - - flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len); - flow_keys.thoff = clamp_t(u16, flow_keys.thoff, - flow_keys.nhoff, skb->len); - - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); + if (attached) { + ret = __skb_flow_bpf_dissect(attached, skb, + flow_dissector, + &flow_keys); + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return ret; + } rcu_read_unlock(); - return result == BPF_OK; } - rcu_read_unlock(); if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 3e85437f7106..a648568c5e8f 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, lwt->name ? : ""); ret = BPF_OK; } else { + skb_reset_mac_header(skb); ret = skb_do_redirect(skb); if (ret == 0) ret = BPF_REDIRECT; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3e27a779f288..4230400b9a30 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -450,7 +450,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); - kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); } if (!buckets) { kfree(ret); @@ -1007,7 +1007,7 @@ static void neigh_probe(struct neighbour *neigh) if (neigh->ops->solicit) neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - kfree_skb(skb); + consume_skb(skb); } /* Called when a timer expires for a neighbour entry. */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b02fb19df2cc..17f36317363d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -778,6 +778,41 @@ nla_put_failure: return -EMSGSIZE; } +static int rtnl_net_valid_getid_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, + rtnl_net_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETNSA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETNSA_PID: + case NETNSA_FD: + case NETNSA_NSID: + case NETNSA_TARGET_NSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -793,8 +828,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *msg; int err; - err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, - rtnl_net_policy, extack); + err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); if (err < 0) return err; if (tb[NETNSA_PID]) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b302df0cd5ae..f5a98082ac7a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3242,6 +3242,53 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } +static int rtnl_valid_getlink_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifinfomsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG(extack, "Invalid header for get link"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + + ifm = nlmsg_data(nlh); + if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || + ifm->ifi_change) { + NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, + extack); + if (err) + return err; + + for (i = 0; i <= IFLA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFLA_IFNAME: + case IFLA_EXT_MASK: + case IFLA_TARGET_NETNSID: + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request"); + return -EINVAL; + } + } + + return 0; +} + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -3256,7 +3303,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u32 ext_filter_mask = 0; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); + err = rtnl_valid_getlink_req(skb, nlh, tb, extack); if (err < 0) return err; @@ -4902,6 +4949,40 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, return size; } +static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, + bool is_dump, struct netlink_ext_ack *extack) +{ + struct if_stats_msg *ifsm; + + if (nlh->nlmsg_len < sizeof(*ifsm)) { + NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); + return -EINVAL; + } + + if (!strict_check) + return 0; + + ifsm = nlmsg_data(nlh); + + /* only requests using strict checks can pass data to influence + * the dump. The legacy exception is filter_mask. + */ + if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) { + NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); + return -EINVAL; + } + if (nlmsg_attrlen(nlh, sizeof(*ifsm))) { + NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); + return -EINVAL; + } + if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { + NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); + return -EINVAL; + } + + return 0; +} + static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4913,8 +4994,10 @@ static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, u32 filter_mask; int err; - if (nlmsg_len(nlh) < sizeof(*ifsm)) - return -EINVAL; + err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), + false, extack); + if (err) + return err; ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) @@ -4966,27 +5049,11 @@ static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->seq = net->dev_base_seq; - if (nlmsg_len(cb->nlh) < sizeof(*ifsm)) { - NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); - return -EINVAL; - } + err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack); + if (err) + return err; ifsm = nlmsg_data(cb->nlh); - - /* only requests using strict checks can pass data to influence - * the dump. The legacy exception is filter_mask. - */ - if (cb->strict_check) { - if (ifsm->pad1 || ifsm->pad2 || ifsm->ifindex) { - NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); - return -EINVAL; - } - if (nlmsg_attrlen(cb->nlh, sizeof(*ifsm))) { - NL_SET_ERR_MSG(extack, "Invalid attributes after stats header"); - return -EINVAL; - } - } - filter_mask = ifsm->filter_mask; if (!filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); diff --git a/net/core/sock.c b/net/core/sock.c index b53764ebb973..900e8a9435f5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1752,7 +1752,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); - atomic64_set(&newsk->sk_cookie, 0); if (likely(newsk->sk_net_refcnt)) sock_inuse_add(sock_net(newsk), 1); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d67ec17f2cc8..84bf2861f45f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -36,6 +36,15 @@ static int net_msg_warn; /* Unused, but still a sysctl */ int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); +/* 0 - Keep current behavior: + * IPv4: inherit all current settings from init_net + * IPv6: reset all settings to default + * 1 - Both inherit all current settings from init_net + * 2 - Both reset all settings to default + */ +int sysctl_devconf_inherit_init_net __read_mostly; +EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -544,6 +553,15 @@ static struct ctl_table net_core_table[] = { .extra1 = &zero, .extra2 = &one, }, + { + .procname = "devconf_inherit_init_net", + .data = &sysctl_devconf_inherit_init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, { } }; diff --git a/net/dccp/input.c b/net/dccp/input.c index 85d6c879383d..8d03707abdac 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } - if (sk->sk_write_pending || icsk->icsk_ack.pingpong || + if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) || icsk->icsk_accept_queue.rskq_defer_accept) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 1501a20a94ca..74e138495d67 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t) icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { + if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); @@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t) /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } dccp_send_ack(sk); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d0b3e69c6b39..0962f9201baa 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -56,7 +56,7 @@ #include #include -#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) +#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e258a00b4a3d..cd9033245b98 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2063,13 +2063,49 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv4_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv4_devconf *devconf; struct in_device *in_dev; @@ -2077,9 +2113,8 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv4_policy, extack); - if (err < 0) + err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack); + if (err) goto errout; err = -EINVAL; @@ -2556,32 +2591,32 @@ static __net_init int devinet_init_net(struct net *net) int err; struct ipv4_devconf *all, *dflt; #ifdef CONFIG_SYSCTL - struct ctl_table *tbl = ctl_forward_entry; + struct ctl_table *tbl; struct ctl_table_header *forw_hdr; #endif err = -ENOMEM; - all = &ipv4_devconf; - dflt = &ipv4_devconf_dflt; + all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL); + if (!all) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); - if (!all) - goto err_alloc_all; - - dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); - if (!dflt) - goto err_alloc_dflt; + dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL); + if (!dflt) + goto err_alloc_dflt; #ifdef CONFIG_SYSCTL - tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL); - if (!tbl) - goto err_alloc_ctl; + tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL); + if (!tbl) + goto err_alloc_ctl; - tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; - tbl[0].extra1 = all; - tbl[0].extra2 = net; + tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1]; + tbl[0].extra1 = all; + tbl[0].extra2 = net; #endif + + if (sysctl_devconf_inherit_init_net != 2 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); + memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); } #ifdef CONFIG_SYSCTL @@ -2611,15 +2646,12 @@ err_reg_ctl: err_reg_dflt: __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL); err_reg_all: - if (tbl != ctl_forward_entry) - kfree(tbl); + kfree(tbl); err_alloc_ctl: #endif - if (dflt != &ipv4_devconf_dflt) - kfree(dflt); + kfree(dflt); err_alloc_dflt: - if (all != &ipv4_devconf) - kfree(all); + kfree(all); err_alloc_all: return err; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6df95be96311..fe4f6a624238 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -203,7 +203,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(net, tb); + flushed += fib_table_flush(net, tb, false); } if (flushed) @@ -1463,7 +1463,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(net, tb); + fib_table_flush(net, tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 237c9f72b265..a573e37e0615 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct net *net, struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || - tb->tb_id != fa->tb_id) { + if (!fi || tb->tb_id != fa->tb_id || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 632863541082..437070d1ffb1 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1020,10 +1020,11 @@ static int gue_err(struct sk_buff *skb, u32 info) { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -1058,6 +1059,10 @@ static int gue_err(struct sk_buff *skb, u32 info) optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index a4bf22ee3aed..7c4a41dc04bb 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,22 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, hdr_len += 4; } tpi->hdr_len = hdr_len; + + /* ERSPAN ver 1 and 2 protocol sets GRE key field + * to 0 and sets the configured key in the + * inner erspan header field + */ + if (greh->protocol == htons(ETH_P_ERSPAN) || + greh->protocol == htons(ETH_P_ERSPAN2)) { + struct erspan_base_hdr *ershdr; + + if (!pskb_may_pull(skb, nhs + hdr_len + sizeof(*ershdr))) + return -EINVAL; + + ershdr = (struct erspan_base_hdr *)options; + tpi->key = cpu_to_be32(get_session_id(ershdr)); + } + return hdr_len; } EXPORT_SYMBOL(gre_parse_header); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 765b2b32c4a4..a40e48ded10d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1493,22 +1493,22 @@ static int ip_mc_check_igmp_reportv3(struct sk_buff *skb) len += sizeof(struct igmpv3_report); - return pskb_may_pull(skb, len) ? 0 : -EINVAL; + return ip_mc_may_pull(skb, len) ? 0 : -EINVAL; } static int ip_mc_check_igmp_query(struct sk_buff *skb) { - unsigned int len = skb_transport_offset(skb); - - len += sizeof(struct igmphdr); - if (skb->len < len) - return -EINVAL; + unsigned int transport_len = ip_transport_len(skb); + unsigned int len; /* IGMPv{1,2}? */ - if (skb->len != len) { + if (transport_len != sizeof(struct igmphdr)) { /* or IGMPv3? */ - len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr); - if (skb->len < len || !pskb_may_pull(skb, len)) + if (transport_len < sizeof(struct igmpv3_query)) + return -EINVAL; + + len = skb_transport_offset(skb) + sizeof(struct igmpv3_query); + if (!ip_mc_may_pull(skb, len)) return -EINVAL; } @@ -1544,47 +1544,29 @@ static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_simple_validate(skb); } -static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) - +static int ip_mc_check_igmp_csum(struct sk_buff *skb) { - struct sk_buff *skb_chk; - unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr); - int ret = -EINVAL; + unsigned int transport_len = ip_transport_len(skb); + struct sk_buff *skb_chk; - transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb); + if (!ip_mc_may_pull(skb, len)) + return -EINVAL; skb_chk = skb_checksum_trimmed(skb, transport_len, ip_mc_validate_checksum); if (!skb_chk) - goto err; + return -EINVAL; - if (!pskb_may_pull(skb_chk, len)) - goto err; - - ret = ip_mc_check_igmp_msg(skb_chk); - if (ret) - goto err; - - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) + if (skb_chk != skb) kfree_skb(skb_chk); - ret = 0; - -err: - if (ret && skb_chk && skb_chk != skb) - kfree_skb(skb_chk); - - return ret; + return 0; } /** * ip_mc_check_igmp - checks whether this is a sane IGMP packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional) * * Checks whether an IPv4 packet is a valid IGMP packet. If so sets * skb transport header accordingly and returns zero. @@ -1594,18 +1576,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an IGMP packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an IGMP packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ip_mc_check_igmp(struct sk_buff *skb) { int ret = ip_mc_check_iphdr(skb); @@ -1615,7 +1589,11 @@ int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ip_hdr(skb)->protocol != IPPROTO_IGMP) return -ENOMSG; - return __ip_mc_check_igmp(skb, skb_trimmed); + ret = ip_mc_check_igmp_csum(skb); + if (ret < 0) + return ret; + + return ip_mc_check_igmp_msg(skb); } EXPORT_SYMBOL(ip_mc_check_igmp); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 760a9e52e02b..9f69411251d0 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -25,6 +25,62 @@ #include #include #include +#include +#include + +/* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. + * + * Invariants: + * - next_frag is NULL at the tail of a "run"; + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. + */ +struct ipfrag_skb_cb { + union { + struct inet_skb_parm h4; + struct inet6_skb_parm h6; + }; + struct sk_buff *next_frag; + int frag_run_len; +}; + +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) + +static void fragcb_clear(struct sk_buff *skb) +{ + RB_CLEAR_NODE(&skb->rbnode); + FRAG_CB(skb)->next_frag = NULL; + FRAG_CB(skb)->frag_run_len = skb->len; +} + +/* Append skb to the last "run". */ +static void fragrun_append_to_last(struct inet_frag_queue *q, + struct sk_buff *skb) +{ + fragcb_clear(skb); + + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; + FRAG_CB(q->fragments_tail)->next_frag = skb; + q->fragments_tail = skb; +} + +/* Create a new "run" with the skb. */ +static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); + fragcb_clear(skb); + + if (q->last_run_head) + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, + &q->last_run_head->rbnode.rb_right); + else + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + + q->fragments_tail = skb; + q->last_run_head = skb; +} /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. @@ -123,6 +179,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } +unsigned int inet_frag_rbtree_purge(struct rb_root *root) +{ + struct rb_node *p = rb_first(root); + unsigned int sum = 0; + + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + while (skb) { + struct sk_buff *next = FRAG_CB(skb)->next_frag; + + sum += skb->truesize; + kfree_skb(skb); + skb = next; + } + } + return sum; +} +EXPORT_SYMBOL(inet_frag_rbtree_purge); + void inet_frag_destroy(struct inet_frag_queue *q) { struct sk_buff *fp; @@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) return fq; } EXPORT_SYMBOL(inet_frag_find); + +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + int offset, int end) +{ + struct sk_buff *last = q->fragments_tail; + + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. + * + * Duplicates, however, should be ignored (i.e. skb dropped, but the + * queue/fragments kept for later reassembly). + */ + if (!last) + fragrun_create(q, skb); /* First fragment. */ + else if (last->ip_defrag_offset + last->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ + if (offset < last->ip_defrag_offset + last->len) + return IPFRAG_OVERLAP; + if (offset == last->ip_defrag_offset + last->len) + fragrun_append_to_last(q, skb); + else + fragrun_create(q, skb); + } else { + /* Binary search. Note that skb can become the first fragment, + * but not the last (covered above). + */ + struct rb_node **rbn, *parent; + + rbn = &q->rb_fragments.rb_node; + do { + struct sk_buff *curr; + int curr_run_end; + + parent = *rbn; + curr = rb_to_skb(parent); + curr_run_end = curr->ip_defrag_offset + + FRAG_CB(curr)->frag_run_len; + if (end <= curr->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= curr_run_end) + rbn = &parent->rb_right; + else if (offset >= curr->ip_defrag_offset && + end <= curr_run_end) + return IPFRAG_DUP; + else + return IPFRAG_OVERLAP; + } while (*rbn); + /* Here we have parent properly set, and rbn pointing to + * one of its NULL left/right children. Insert skb. + */ + fragcb_clear(skb); + rb_link_node(&skb->rbnode, parent, rbn); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + } + + skb->ip_defrag_offset = offset; + + return IPFRAG_OK; +} +EXPORT_SYMBOL(inet_frag_queue_insert); + +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + struct sk_buff *parent) +{ + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); + struct sk_buff **nextp; + int delta; + + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); + if (!fp) + return NULL; + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(parent)->next_frag = fp; + else + rb_replace_node(&skb->rbnode, &fp->rbnode, + &q->rb_fragments); + if (q->fragments_tail == skb) + q->fragments_tail = fp; + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + consume_skb(head); + head = skb; + } + WARN_ON(head->ip_defrag_offset != 0); + + delta = -head->truesize; + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + return NULL; + + delta += head->truesize; + if (delta) + add_frag_mem_limit(q->net, delta); + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. + */ + if (skb_has_frag_list(head)) { + struct sk_buff *clone; + int i, plen = 0; + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) + return NULL; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); + clone->data_len = head->data_len - plen; + clone->len = clone->data_len; + head->truesize += clone->truesize; + clone->csum = 0; + clone->ip_summed = head->ip_summed; + add_frag_mem_limit(q->net, clone->truesize); + skb_shinfo(head)->frag_list = clone; + nextp = &clone->next; + } else { + nextp = &skb_shinfo(head)->frag_list; + } + + return nextp; +} +EXPORT_SYMBOL(inet_frag_reasm_prepare); + +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data) +{ + struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct rb_node *rbn; + struct sk_buff *fp; + + skb_push(head, head->data - skb_network_header(head)); + + /* Traverse the tree in order, to build frag_list. */ + fp = FRAG_CB(head)->next_frag; + rbn = rb_next(&head->rbnode); + rb_erase(&head->rbnode, &q->rb_fragments); + while (rbn || fp) { + /* fp points to the next sk_buff in the current run; + * rbn points to the next run. + */ + /* Go through the current run. */ + while (fp) { + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + fp = FRAG_CB(fp)->next_frag; + } + /* Move to the next run. */ + if (rbn) { + struct rb_node *rbnext = rb_next(rbn); + + fp = rb_to_skb(rbn); + rb_erase(rbn, &q->rb_fragments); + rbn = rbnext; + } + } + sub_frag_mem_limit(q->net, head->truesize); + + *nextp = NULL; + skb_mark_not_on_list(head); + head->prev = NULL; + head->tstamp = q->stamp; +} +EXPORT_SYMBOL(inet_frag_reasm_finish); + +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) +{ + struct sk_buff *head; + + if (q->fragments) { + head = q->fragments; + q->fragments = head->next; + } else { + struct sk_buff *skb; + + head = skb_rb_first(&q->rb_fragments); + if (!head) + return NULL; + skb = FRAG_CB(head)->next_frag; + if (skb) + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + else + rb_erase(&head->rbnode, &q->rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); + } + if (head == q->fragments_tail) + q->fragments_tail = NULL; + + sub_frag_mem_limit(q->net, head->truesize); + + return head; +} +EXPORT_SYMBOL(inet_frag_pull_head); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 867be8f7f1fa..486ecb0aeb87 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -57,57 +57,6 @@ */ static const char ip_frag_cache_name[] = "ip4-frags"; -/* Use skb->cb to track consecutive/adjacent fragments coming at - * the end of the queue. Nodes in the rb-tree queue will - * contain "runs" of one or more adjacent fragments. - * - * Invariants: - * - next_frag is NULL at the tail of a "run"; - * - the head of a "run" has the sum of all fragment lengths in frag_run_len. - */ -struct ipfrag_skb_cb { - struct inet_skb_parm h; - struct sk_buff *next_frag; - int frag_run_len; -}; - -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) - -static void ip4_frag_init_run(struct sk_buff *skb) -{ - BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); - - FRAG_CB(skb)->next_frag = NULL; - FRAG_CB(skb)->frag_run_len = skb->len; -} - -/* Append skb to the last "run". */ -static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, - struct sk_buff *skb) -{ - RB_CLEAR_NODE(&skb->rbnode); - FRAG_CB(skb)->next_frag = NULL; - - FRAG_CB(q->last_run_head)->frag_run_len += skb->len; - FRAG_CB(q->fragments_tail)->next_frag = skb; - q->fragments_tail = skb; -} - -/* Create a new "run" with the skb. */ -static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) -{ - if (q->last_run_head) - rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, - &q->last_run_head->rbnode.rb_right); - else - rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); - rb_insert_color(&skb->rbnode, &q->rb_fragments); - - ip4_frag_init_run(skb); - q->fragments_tail = skb; - q->last_run_head = skb; -} - /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; @@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t) * pull the head out of the tree in order to be able to * deal with head->dev. */ - if (qp->q.fragments) { - head = qp->q.fragments; - qp->q.fragments = head->next; - } else { - head = skb_rb_first(&qp->q.rb_fragments); - if (!head) - goto out; - if (FRAG_CB(head)->next_frag) - rb_replace_node(&head->rbnode, - &FRAG_CB(head)->next_frag->rbnode, - &qp->q.rb_fragments); - else - rb_erase(&head->rbnode, &qp->q.rb_fragments); - memset(&head->rbnode, 0, sizeof(head->rbnode)); - barrier(); - } - if (head == qp->q.fragments_tail) - qp->q.fragments_tail = NULL; - - sub_frag_mem_limit(qp->q.net, head->truesize); - + head = inet_frag_pull_head(&qp->q); + if (!head) + goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; @@ -344,12 +275,10 @@ static int ip_frag_reinit(struct ipq *qp) static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); - struct rb_node **rbn, *parent; - struct sk_buff *skb1, *prev_tail; - int ihl, end, skb1_run_end; + int ihl, end, flags, offset; + struct sk_buff *prev_tail; struct net_device *dev; unsigned int fragsize; - int flags, offset; int err = -ENOENT; u8 ecn; @@ -413,62 +342,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Makes sure compiler wont do silly aliasing games */ barrier(); - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - * - * We do the same here for IPv4 (and increment an snmp counter) but - * we do not want to drop the whole queue in response to a duplicate - * fragment. - */ - - err = -EINVAL; - /* Find out where to put this fragment. */ prev_tail = qp->q.fragments_tail; - if (!prev_tail) - ip4_frag_create_run(&qp->q, skb); /* First fragment. */ - else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { - /* This is the common case: skb goes to the end. */ - /* Detect and discard overlaps. */ - if (offset < prev_tail->ip_defrag_offset + prev_tail->len) - goto overlap; - if (offset == prev_tail->ip_defrag_offset + prev_tail->len) - ip4_frag_append_to_last_run(&qp->q, skb); - else - ip4_frag_create_run(&qp->q, skb); - } else { - /* Binary search. Note that skb can become the first fragment, - * but not the last (covered above). - */ - rbn = &qp->q.rb_fragments.rb_node; - do { - parent = *rbn; - skb1 = rb_to_skb(parent); - skb1_run_end = skb1->ip_defrag_offset + - FRAG_CB(skb1)->frag_run_len; - if (end <= skb1->ip_defrag_offset) - rbn = &parent->rb_left; - else if (offset >= skb1_run_end) - rbn = &parent->rb_right; - else if (offset >= skb1->ip_defrag_offset && - end <= skb1_run_end) - goto err; /* No new data, potential duplicate */ - else - goto overlap; /* Found an overlap */ - } while (*rbn); - /* Here we have parent properly set, and rbn pointing to - * one of its NULL left/right children. Insert skb. - */ - ip4_frag_init_run(skb); - rb_link_node(&skb->rbnode, parent, rbn); - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); - } + err = inet_frag_queue_insert(&qp->q, skb, offset, end); + if (err) + goto insert_error; if (dev) qp->iif = dev->ifindex; - skb->ip_defrag_offset = offset; qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; @@ -501,10 +381,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) skb_dst_drop(skb); return -EINPROGRESS; -overlap: +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: inet_frag_kill(&qp->q); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb(skb); return err; @@ -516,13 +402,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; - struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); - struct sk_buff **nextp; /* To build frag_list. */ - struct rb_node *rbn; - int len; - int ihlen; - int delta; - int err; + void *reasm_data; + int len, err; u8 ecn; ipq_kill(qp); @@ -532,117 +413,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, err = -EINVAL; goto out_fail; } + /* Make the one we just received the head. */ - if (head != skb) { - fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - goto out_nomem; - FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; - if (RB_EMPTY_NODE(&skb->rbnode)) - FRAG_CB(prev_tail)->next_frag = fp; - else - rb_replace_node(&skb->rbnode, &fp->rbnode, - &qp->q.rb_fragments); - if (qp->q.fragments_tail == skb) - qp->q.fragments_tail = fp; - skb_morph(skb, head); - FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; - rb_replace_node(&head->rbnode, &skb->rbnode, - &qp->q.rb_fragments); - consume_skb(head); - head = skb; - } - - WARN_ON(head->ip_defrag_offset != 0); - - /* Allocate a new buffer for the datagram. */ - ihlen = ip_hdrlen(head); - len = ihlen + qp->q.len; + reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); + if (!reasm_data) + goto out_nomem; + len = ip_hdrlen(skb) + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; - delta = - head->truesize; + inet_frag_reasm_finish(&qp->q, skb, reasm_data); - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_nomem; + skb->dev = dev; + IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); - delta += head->truesize; - if (delta) - add_frag_mem_limit(qp->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_nomem; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->truesize += clone->truesize; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(qp->q.net, clone->truesize); - skb_shinfo(head)->frag_list = clone; - nextp = &clone->next; - } else { - nextp = &skb_shinfo(head)->frag_list; - } - - skb_push(head, head->data - skb_network_header(head)); - - /* Traverse the tree in order, to build frag_list. */ - fp = FRAG_CB(head)->next_frag; - rbn = rb_next(&head->rbnode); - rb_erase(&head->rbnode, &qp->q.rb_fragments); - while (rbn || fp) { - /* fp points to the next sk_buff in the current run; - * rbn points to the next run. - */ - /* Go through the current run. */ - while (fp) { - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - fp->sk = NULL; - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp = FRAG_CB(fp)->next_frag; - } - /* Move to the next run. */ - if (rbn) { - struct rb_node *rbnext = rb_next(rbn); - - fp = rb_to_skb(rbn); - rb_erase(rbn, &qp->q.rb_fragments); - rbn = rbnext; - } - } - sub_frag_mem_limit(qp->q.net, head->truesize); - - *nextp = NULL; - skb_mark_not_on_list(head); - head->prev = NULL; - head->dev = dev; - head->tstamp = qp->q.stamp; - IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); - - iph = ip_hdr(head); + iph = ip_hdr(skb); iph->tot_len = htons(len); iph->tos |= ecn; @@ -655,7 +442,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, * from one very small df-fragment and one large non-df frag. */ if (qp->max_df_size == qp->q.max_size) { - IPCB(head)->flags |= IPSKB_FRAG_PMTU; + IPCB(skb)->flags |= IPSKB_FRAG_PMTU; iph->frag_off = htons(IP_DF); } else { iph->frag_off = 0; @@ -753,28 +540,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_check_defrag); -unsigned int inet_frag_rbtree_purge(struct rb_root *root) -{ - struct rb_node *p = rb_first(root); - unsigned int sum = 0; - - while (p) { - struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - - p = rb_next(p); - rb_erase(&skb->rbnode, root); - while (skb) { - struct sk_buff *next = FRAG_CB(skb)->next_frag; - - sum += skb->truesize; - kfree_skb(skb); - skb = next; - } - } - return sum; -} -EXPORT_SYMBOL(inet_frag_rbtree_purge); - #ifdef CONFIG_SYSCTL static int dist_min; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1d09f3e5f9e..d1cef66820d3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -268,20 +268,11 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, int len; itn = net_generic(net, erspan_net_id); - len = gre_hdr_len + sizeof(*ershdr); - - /* Check based hdr len */ - if (unlikely(!pskb_may_pull(skb, len))) - return PACKET_REJECT; iph = ip_hdr(skb); ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); ver = ershdr->ver; - /* The original GRE header does not have key field, - * Use ERSPAN 10-bit session ID as key. - */ - tpi->key = cpu_to_be32(get_session_id(ershdr)); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); @@ -458,81 +449,14 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum) return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } -static struct rtable *gre_get_rt(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - const struct ip_tunnel_key *key) -{ - struct net *net = dev_net(dev); - - memset(fl, 0, sizeof(*fl)); - fl->daddr = key->u.ipv4.dst; - fl->saddr = key->u.ipv4.src; - fl->flowi4_tos = RT_TOS(key->tos); - fl->flowi4_mark = skb->mark; - fl->flowi4_proto = IPPROTO_GRE; - - return ip_route_output_key(net, fl); -} - -static struct rtable *prepare_fb_xmit(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - int tunnel_hlen) -{ - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - int min_headroom; - bool use_cache; - int err; - - tun_info = skb_tunnel_info(skb); - key = &tun_info->key; - use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); - - if (use_cache) - rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); - if (!rt) { - rt = gre_get_rt(skb, dev, fl, key); - if (IS_ERR(rt)) - goto err_free_skb; - if (use_cache) - dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, - fl->saddr); - } - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + tunnel_hlen + sizeof(struct iphdr); - if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { - int head_delta = SKB_DATA_ALIGN(min_headroom - - skb_headroom(skb) + - 16); - err = pskb_expand_head(skb, max_t(int, head_delta, 0), - 0, GFP_ATOMIC); - if (unlikely(err)) - goto err_free_rt; - } - return rt; - -err_free_rt: - ip_rt_put(rt); -err_free_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return NULL; -} - static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - struct flowi4 fl; int tunnel_hlen; - __be16 df, flags; + __be16 flags; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -542,13 +466,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, key = &tun_info->key; tunnel_hlen = gre_calc_hlen(key->tun_flags); - rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb; /* Push Tunnel header. */ if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) - goto err_free_rt; + goto err_free_skb; flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); @@ -556,32 +479,25 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); - iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return; -err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; } -static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, - __be16 proto) +static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; - struct rtable *rt = NULL; bool truncate = false; - struct flowi4 fl; + __be16 proto; int tunnel_hlen; int version; - __be16 df; int nhoff; int thoff; @@ -592,21 +508,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, key = &tun_info->key; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) - goto err_free_rt; + goto err_free_skb; md = ip_tunnel_info_opts(tun_info); if (!md) - goto err_free_rt; + goto err_free_skb; /* ERSPAN has fixed 8 byte GRE header */ version = md->version; tunnel_hlen = 8 + erspan_hdr_len(version); - rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb; if (gre_handle_offloads(skb, false)) - goto err_free_rt; + goto err_free_skb; if (skb->len > dev->mtu + dev->hard_header_len) { pskb_trim(skb, dev->mtu + dev->hard_header_len); @@ -626,27 +541,25 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); + proto = htons(ETH_P_ERSPAN); } else if (version == 2) { erspan_build_header_v2(skb, ntohl(tunnel_id_to_key32(key->tun_id)), md->u.md2.dir, get_hwid(&md->u.md2), truncate, true); + proto = htons(ETH_P_ERSPAN2); } else { - goto err_free_rt; + goto err_free_skb; } gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(tunnel->o_seqno++)); - df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); - iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return; -err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; @@ -655,13 +568,18 @@ err_free_skb: static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_key *key; struct rtable *rt; struct flowi4 fl4; if (ip_tunnel_info_af(info) != AF_INET) return -EINVAL; - rt = gre_get_rt(skb, dev, &fl4, &info->key); + key = &info->key; + ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, + tunnel_id_to_key32(key->tun_id), key->tos, 0, + skb->mark); + rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -721,12 +639,13 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + __be16 proto; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { - erspan_fb_xmit(skb, dev, skb->protocol); + erspan_fb_xmit(skb, dev); return NETDEV_TX_OK; } @@ -742,19 +661,22 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - if (tunnel->erspan_ver == 1) + if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else if (tunnel->erspan_ver == 2) + proto = htons(ETH_P_ERSPAN); + } else if (tunnel->erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); - else + proto = htons(ETH_P_ERSPAN2); + } else { goto free_skb; + } tunnel->parms.o_flags &= ~TUNNEL_KEY; - __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); + __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; free_skb: diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 26921f6b3b92..cd6b5694f99e 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -429,7 +429,6 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); @@ -488,6 +487,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ @@ -520,6 +520,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, skb = ip_rcv_core(skb, net); if (skb == NULL) return NET_RX_DROP; + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c4f5602308ed..893f013d5369 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -501,15 +501,19 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df, - const struct iphdr *inner_iph) + const struct iphdr *inner_iph, + int tunnel_hlen, __be32 dst, bool md) { struct ip_tunnel *tunnel = netdev_priv(dev); - int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; + int pkt_size; int mtu; + tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; + pkt_size = skb->len - tunnel_hlen - dev->hard_header_len; + if (df) mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr) - tunnel->hlen; + - sizeof(struct iphdr) - tunnel_hlen; else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -527,11 +531,13 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + __be32 daddr; + + daddr = md ? dst : tunnel->parms.iph.daddr; if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + if ((daddr && !ipv4_is_multicast(daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); @@ -548,17 +554,19 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } -void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + u8 proto, int tunnel_hlen) { struct ip_tunnel *tunnel = netdev_priv(dev); u32 headroom = sizeof(struct iphdr); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; const struct iphdr *inner_iph; - struct rtable *rt; + struct rtable *rt = NULL; struct flowi4 fl4; __be16 df = 0; u8 tos, ttl; + bool use_cache; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -574,20 +582,39 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) else if (skb->protocol == htons(ETH_P_IPV6)) tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, - RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); + ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, + tunnel_id_to_key32(key->tun_id), RT_TOS(tos), + 0, skb->mark); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; - rt = ip_route_output_key(tunnel->net, &fl4); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; + + use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); + if (use_cache) + rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr); + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (use_cache) + dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, + fl4.saddr); } if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } + + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, + key->u.ipv4.dst, true)) { + ip_rt_put(rt); + goto tx_error; + } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = key->ttl; if (ttl == 0) { @@ -598,10 +625,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) else ttl = ip4_dst_hoplimit(&rt->dst); } - if (key->tun_flags & TUNNEL_DONT_FRAGMENT) - df = htons(IP_DF); - else if (skb->protocol == htons(ETH_P_IP)) + + if (!df && skb->protocol == htons(ETH_P_IP)) df = inner_iph->frag_off & htons(IP_DF); + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; if (headroom > dev->needed_headroom) dev->needed_headroom = headroom; @@ -644,13 +671,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ + struct ip_tunnel_info *tun_info; if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) { + tun_info = skb_tunnel_info(skb); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && + ip_tunnel_info_af(tun_info) == AF_INET && + tun_info->key.u.ipv4.dst) + dst = tun_info->key.u.ipv4.dst; + else if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); dst = rt_nexthop(rt, inner_iph->daddr); } @@ -731,7 +764,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph, + 0, 0, false)) { ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d7b43e700023..68a21bf75dd0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -435,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -603,6 +644,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -612,6 +660,8 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 57c5dd283a2c..fe10b9a2efc8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -302,7 +302,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, ipproto); if (tunnel->collect_md) - ip_md_tunnel_xmit(skb, dev, ipproto); + ip_md_tunnel_xmit(skb, dev, ipproto, 0); else ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ddbf8c9a1abb..fb99002c3d4e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2467,6 +2467,61 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS); } +static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_TABLE: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request"); + return -EINVAL; + } + } + + return 0; +} + static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2475,18 +2530,14 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; - struct rtmsg *rtm; __be32 src, grp; u32 tableid; int err; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; - rtm = nlmsg_data(nlh); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..a058213b77a7 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -80,24 +80,6 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t } EXPORT_SYMBOL(ip_route_me_harder); -int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) -{ - const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); - - if (!(iph->tos == rt_info->tos && - skb->mark == rt_info->mark && - iph->daddr == rt_info->daddr && - iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_ip_reroute); - int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict __always_unused) { diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b61977db9b7f..2a909e5f9ba0 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -846,9 +846,9 @@ static int clusterip_net_init(struct net *net) static void clusterip_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS struct clusterip_net *cn = clusterip_pernet(net); -#ifdef CONFIG_PROC_FS mutex_lock(&cn->mutex); proc_remove(cn->procdir); cn->procdir = NULL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 2687db015b6f..e26165af45cb 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -214,7 +214,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, } /* Change outer to look like the reply to an incoming packet */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce92f73cf104..99be68b15da0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2763,6 +2763,75 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, return skb; } +static int inet_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG(extack, + "ipv4: Invalid header for route get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || + rtm->rtm_table || rtm->rtm_protocol || + rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); + return -EINVAL; + } + + if (rtm->rtm_flags & ~(RTM_F_NOTIFY | + RTM_F_LOOKUP_TABLE | + RTM_F_FIB_MATCH)) { + NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_IIF: + case RTA_OIF: + case RTA_SRC: + case RTA_DST: + case RTA_IP_PROTO: + case RTA_SPORT: + case RTA_DPORT: + case RTA_MARK: + case RTA_UID: + break; + default: + NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -2783,8 +2852,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err; int mark; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, - extack); + err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541bdb9f81d7..6f8d292ad501 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1127,7 +1127,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, - int *copied, size_t size) + int *copied, size_t size, + struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -1147,6 +1148,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return -ENOBUFS; tp->fastopen_req->data = msg; tp->fastopen_req->size = size; + tp->fastopen_req->uarg = uarg; if (inet->defer_connect) { err = tcp_connect(sk); @@ -1186,11 +1188,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { - if (sk->sk_state != TCP_ESTABLISHED) { - err = -EINVAL; - goto out_err; - } - skb = tcp_write_queue_tail(sk); uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); if (!uarg) { @@ -1205,7 +1202,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) && !tp->repair) { - err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) @@ -1554,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) (copied > 0 && ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong)) && + !inet_csk_in_pingpong_mode(sk))) && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = true; } @@ -2987,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_QUICKACK: if (!val) { - icsk->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); } else { - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && inet_csk_ack_scheduled(sk)) { icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; tcp_cleanup_rbuf(sk, 1); if (!(val & 1)) - icsk->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); } } break; @@ -3410,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; } case TCP_QUICKACK: - val = !icsk->icsk_ack.pingpong; + val = !inet_csk_in_pingpong_mode(sk); break; case TCP_CONGESTION: diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 0f497fc49c3f..56be7d27f208 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -115,6 +115,14 @@ struct bbr { unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ + + /* For tracking ACK aggregation: */ + u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ + u16 extra_acked[2]; /* max excess data ACKed in epoch */ + u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ + extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ + extra_acked_win_idx:1, /* current index in extra_acked array */ + unused_c:6; }; #define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ @@ -182,6 +190,15 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +/* Gain factor for adding extra_acked to target cwnd: */ +static const int bbr_extra_acked_gain = BBR_UNIT; +/* Window length of extra_acked window. */ +static const u32 bbr_extra_acked_win_rtts = 5; +/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ +static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; +/* Time period for clamping cwnd increment due to ack aggregation */ +static const u32 bbr_extra_acked_max_us = 100 * 1000; + static void bbr_check_probe_rtt_done(struct sock *sk); /* Do we estimate that STARTUP filled the pipe? */ @@ -208,6 +225,16 @@ static u32 bbr_bw(const struct sock *sk) return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); } +/* Return maximum extra acked in past k-2k round trips, + * where k = bbr_extra_acked_win_rtts. + */ +static u16 bbr_extra_acked(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->extra_acked[0], bbr->extra_acked[1]); +} + /* Return rate in bytes per second, optionally with a gain. * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. @@ -305,6 +332,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) if (event == CA_EVENT_TX_START && tp->app_limited) { bbr->idle_restart = 1; + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; /* Avoid pointless buffer overflows: pace at est. bw if we don't * need more speed (we're restarting from idle and app-limited). */ @@ -315,30 +344,19 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) } } -/* Find target cwnd. Right-size the cwnd based on min RTT and the - * estimated bottleneck bandwidth: +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * cwnd = bw * min_rtt * gain = BDP * gain + * bdp = bw * min_rtt * gain * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT * measurements (e.g., delayed ACKs or other ACK compression effects). This * noise may cause BBR to under-estimate the rate. - * - * To achieve full performance in high-speed paths, we budget enough cwnd to - * fit full-sized skbs in-flight on both end hosts to fully utilize the path: - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine - * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because - * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. */ -static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) { struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd; + u32 bdp; u64 w; /* If we've never had a valid RTT sample, cap cwnd at the initial @@ -353,7 +371,24 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; /* Apply a gain to the given value, then remove the BW_SCALE shift. */ - cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr_tso_segs_goal(sk); @@ -368,6 +403,17 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) return cwnd; } +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight, gain); + + return inflight; +} + /* With pacing at lower layers, there's often less data "in the network" than * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), * we often have several skbs queued in the pacing layer with a pre-scheduled @@ -401,6 +447,22 @@ static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) return inflight_at_edt - interval_delivered; } +/* Find the cwnd increment based on estimate of ack aggregation */ +static u32 bbr_ack_aggregation_cwnd(struct sock *sk) +{ + u32 max_aggr_cwnd, aggr_cwnd = 0; + + if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { + max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) + / BW_UNIT; + aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) + >> BBR_SCALE; + aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); + } + + return aggr_cwnd; +} + /* An optimization in BBR to reduce losses: On the first round of recovery, we * follow the packet conservation principle: send P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked. @@ -461,8 +523,15 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; + target_cwnd = bbr_bdp(sk, bw, gain); + + /* Increment the cwnd to account for excess ACKed data that seems + * due to aggregation (of data and/or ACKs) visible in the ACK stream. + */ + target_cwnd += bbr_ack_aggregation_cwnd(sk); + target_cwnd = bbr_quantization_budget(sk, target_cwnd, gain); + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ - target_cwnd = bbr_target_cwnd(sk, bw, gain); if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ cwnd = min(cwnd + acked, target_cwnd); else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) @@ -503,14 +572,14 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, if (bbr->pacing_gain > BBR_UNIT) return is_full_length && (rs->losses || /* perhaps pacing_gain*BDP won't fit */ - inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); /* A pacing_gain < 1.0 tries to drain extra queue we added if bw * probing didn't find more bw. If inflight falls to match BDP then we * estimate queue is drained; persisting would underutilize the pipe. */ return is_full_length || - inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); + inflight <= bbr_inflight(sk, bw, BBR_UNIT); } static void bbr_advance_cycle_phase(struct sock *sk) @@ -727,6 +796,67 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) } } +/* Estimates the windowed max degree of ack aggregation. + * This is used to provision extra in-flight data to keep sending during + * inter-ACK silences. + * + * Degree of ack aggregation is estimated as extra data acked beyond expected. + * + * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" + * cwnd += max_extra_acked + * + * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). + * Max filter is an approximate sliding window of 5-10 (packet timed) round + * trips. + */ +static void bbr_update_ack_aggregation(struct sock *sk, + const struct rate_sample *rs) +{ + u32 epoch_us, expected_acked, extra_acked; + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || + rs->delivered < 0 || rs->interval_us <= 0) + return; + + if (bbr->round_start) { + bbr->extra_acked_win_rtts = min(0x1F, + bbr->extra_acked_win_rtts + 1); + if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? + 0 : 1; + bbr->extra_acked[bbr->extra_acked_win_idx] = 0; + } + } + + /* Compute how many packets we expected to be delivered over epoch. */ + epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, + bbr->ack_epoch_mstamp); + expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; + + /* Reset the aggregation epoch if ACK rate is below expected rate or + * significantly large no. of ack received since epoch (potentially + * quite old epoch). + */ + if (bbr->ack_epoch_acked <= expected_acked || + (bbr->ack_epoch_acked + rs->acked_sacked >= + bbr_ack_epoch_acked_reset_thresh)) { + bbr->ack_epoch_acked = 0; + bbr->ack_epoch_mstamp = tp->delivered_mstamp; + expected_acked = 0; + } + + /* Compute excess data delivered, beyond what was expected. */ + bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, + bbr->ack_epoch_acked + rs->acked_sacked); + extra_acked = bbr->ack_epoch_acked - expected_acked; + extra_acked = min(extra_acked, tp->snd_cwnd); + if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) + bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; +} + /* Estimate when the pipe is full, using the change in delivery rate: BBR * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited @@ -762,11 +892,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ tcp_sk(sk)->snd_ssthresh = - bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= - bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } @@ -881,6 +1011,7 @@ static void bbr_update_gains(struct sock *sk) static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) { bbr_update_bw(sk, rs); + bbr_update_ack_aggregation(sk, rs); bbr_update_cycle_phase(sk, rs); bbr_check_full_bw_reached(sk, rs); bbr_check_drain(sk, rs); @@ -932,6 +1063,13 @@ static void bbr_init(struct sock *sk) bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 76858b14ebe9..7a027dec649b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk, max_quickacks); - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } EXPORT_SYMBOL(tcp_enter_quickack_mode); @@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk) const struct dst_entry *dst = __sk_dst_get(sk); return (dst && dst_metric(dst, RTAX_QUICKACK)) || - (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong); + (icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk)); } static void tcp_ecn_queue_cwr(struct tcp_sock *tp) @@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + inet_csk_enter_pingpong_mode(sk); break; case TCP_CLOSE_WAIT: @@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, return -1; if (sk->sk_write_pending || icsk->icsk_accept_queue.rskq_defer_accept || - icsk->icsk_ack.pingpong) { + inet_csk_in_pingpong_mode(sk)) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index efc6fef692ff..662b034f1795 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6527f61f59ff..96bdb8eae9bb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp, if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); - tp->lsndtime = now; - - /* If it is a reply for ato after last received - * packet, enter pingpong mode. + /* If this is the first data packet sent in response to the + * previous received data, + * and it is a reply for ato after last received packet, + * increase pingpong count. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && + (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + inet_csk_inc_pingpong_cnt(sk); + + tp->lsndtime = now; } /* Account for an ACK we sent. */ @@ -3455,6 +3458,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) skb_trim(syn_data, copied); space = copied; } + skb_zcopy_set(syn_data, fo->uarg, NULL); } /* No more data pending in inet_wait_for_connect() */ if (space == fo->size) @@ -3568,7 +3572,7 @@ void tcp_send_delayed_ack(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ / 2; - if (icsk->icsk_ack.pingpong || + if (inet_csk_in_pingpong_mode(sk) || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d7399a89469d..f0c86398e6a7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk) icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (inet_csk_ack_scheduled(sk)) { - if (!icsk->icsk_ack.pingpong) { + if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - icsk->icsk_ack.pingpong = 0; + inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_mstamp_refresh(tcp_sk(sk)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3fb0ed5e4789..5c3cd5d84a6f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -847,15 +847,23 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (sk->sk_no_check_tx) + } + if (sk->sk_no_check_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; @@ -1918,7 +1926,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) } EXPORT_SYMBOL(udp_lib_rehash); -static void udp_v4_rehash(struct sock *sk) +void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 322672655419..6b2fa77eeb1c 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -10,6 +10,7 @@ int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); int udp_v4_get_port(struct sock *sk, unsigned short snum); +void udp_v4_rehash(struct sock *sk); int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39c7f17d916f..3c94b8f0ff27 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -53,6 +53,7 @@ struct proto udplite_prot = { .sendpage = udp_sendpage, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 93d5ad2b1a69..dcb1d434f7da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) }, }; +static int inet6_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) @@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, struct nlattr *tb[NETCONFA_MAX+1]; struct inet6_dev *in6_dev = NULL; struct net_device *dev = NULL; - struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_ipv6_policy, extack); + err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -3495,8 +3530,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ - pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", - dev->name); + pr_debug("ADDRCONF(NETDEV_UP): %s: link is not ready\n", + dev->name); break; } @@ -5120,6 +5155,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, if (idev) { err = in6_dump_addrs(idev, skb, cb, s_ip_idx, &fillargs); + if (err > 0) + err = 0; } goto put_tgt_net; } @@ -5179,6 +5216,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrmsg *ifm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request"); + return -EINVAL; + } + + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX, + ifa_ipv6_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFA_TARGET_NETNSID: + case IFA_ADDRESS: + case IFA_LOCAL: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -5199,8 +5282,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, - extack); + err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack); if (err < 0) return err; @@ -6822,6 +6904,11 @@ static int __net_init addrconf_init_net(struct net *net) if (!dflt) goto err_alloc_dflt; + if (sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { + memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + } + /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0d1ee82ee55b..d43d076c98f5 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } +static int ip6addrlbl_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct ifaddrlblmsg *ifal; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) { + NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + + ifal = nlmsg_data(nlh); + if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, + ifal_policy, extack); + if (err) + return err; + + for (i = 0; i <= IFAL_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case IFAL_ADDRESS: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); + return -EINVAL; + } + } + + return 0; +} + static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct ip6addrlbl_entry *p; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, - extack); + err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 7da7bf3b7fe3..b858bd5280bf 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -90,10 +90,11 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -128,6 +129,10 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4a1a86e9c0e9..e081e69d534e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -534,13 +534,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, struct ip6_tnl *tunnel; u8 ver; - if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr)))) - return PACKET_REJECT; - ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; ver = ershdr->ver; - tpi->key = cpu_to_be32(get_session_id(ershdr)); tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, tpi->key, @@ -922,6 +918,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __u8 dsfield = false; struct flowi6 fl6; int err = -EINVAL; + __be16 proto; __u32 mtu; int nhoff; int thoff; @@ -1035,8 +1032,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) + : htons(ETH_P_ERSPAN2); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) @@ -1169,6 +1167,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; + t->parms.erspan_ver = p->erspan_ver; + t->parms.index = p->index; + t->parms.dir = p->dir; + t->parms.hwid = p->hwid; dst_cache_reset(&t->dst_cache); } @@ -2025,9 +2027,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct __ip6_tnl_parm p; - struct ip6_tnl *t; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 30337b38274b..cc01aa3f2b5e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1516,6 +1516,9 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); list_del_rcu(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); mr_cache_put(c); } @@ -1524,10 +1527,6 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_ENTRY_DEL, - (struct mfc6_cache *)c, - mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 21f6deb2aec9..42f3f5cd349f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -940,6 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) { return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE); } +EXPORT_SYMBOL(ipv6_dev_mc_inc); /* * device multicast group del @@ -987,6 +988,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) return err; } +EXPORT_SYMBOL(ipv6_dev_mc_dec); /* * check if the interface/address pair is valid diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index 9405b04eecc6..55e2ac179f28 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) if (skb->len < len || len <= offset) return -EINVAL; + skb_set_transport_header(skb, offset); + return 0; } @@ -77,27 +79,27 @@ static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) len += sizeof(struct mld2_report); - return pskb_may_pull(skb, len) ? 0 : -EINVAL; + return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL; } static int ipv6_mc_check_mld_query(struct sk_buff *skb) { + unsigned int transport_len = ipv6_transport_len(skb); struct mld_msg *mld; - unsigned int len = skb_transport_offset(skb); + unsigned int len; /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) return -EINVAL; - len += sizeof(struct mld_msg); - if (skb->len < len) - return -EINVAL; - /* MLDv1? */ - if (skb->len != len) { + if (transport_len != sizeof(struct mld_msg)) { /* or MLDv2? */ - len += sizeof(struct mld2_query) - sizeof(struct mld_msg); - if (skb->len < len || !pskb_may_pull(skb, len)) + if (transport_len < sizeof(struct mld2_query)) + return -EINVAL; + + len = skb_transport_offset(skb) + sizeof(struct mld2_query); + if (!ipv6_mc_may_pull(skb, len)) return -EINVAL; } @@ -115,7 +117,13 @@ static int ipv6_mc_check_mld_query(struct sk_buff *skb) static int ipv6_mc_check_mld_msg(struct sk_buff *skb) { - struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + struct mld_msg *mld; + + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; + + mld = (struct mld_msg *)skb_transport_header(skb); switch (mld->mld_type) { case ICMPV6_MGM_REDUCTION: @@ -136,49 +144,30 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); } -static int __ipv6_mc_check_mld(struct sk_buff *skb, - struct sk_buff **skb_trimmed) - +int ipv6_mc_check_icmpv6(struct sk_buff *skb) { - struct sk_buff *skb_chk = NULL; - unsigned int transport_len; - unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret = -EINVAL; + unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr); + unsigned int transport_len = ipv6_transport_len(skb); + struct sk_buff *skb_chk; - transport_len = ntohs(ipv6_hdr(skb)->payload_len); - transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + if (!ipv6_mc_may_pull(skb, len)) + return -EINVAL; skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - goto err; + return -EINVAL; - if (!pskb_may_pull(skb_chk, len)) - goto err; - - ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) - goto err; - - if (skb_trimmed) - *skb_trimmed = skb_chk; - /* free now unneeded clone */ - else if (skb_chk != skb) + if (skb_chk != skb) kfree_skb(skb_chk); - ret = 0; - -err: - if (ret && skb_chk && skb_chk != skb) - kfree_skb(skb_chk); - - return ret; + return 0; } +EXPORT_SYMBOL(ipv6_mc_check_icmpv6); /** * ipv6_mc_check_mld - checks whether this is a sane MLD packet * @skb: the skb to validate - * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets * skb transport header accordingly and returns zero. @@ -188,18 +177,10 @@ err: * -ENOMSG: IP header validation succeeded but it is not an MLD packet. * -ENOMEM: A memory allocation failure happened. * - * Optionally, an skb pointer might be provided via skb_trimmed (or set it - * to NULL): After parsing an MLD packet successfully it will point to - * an skb which has its tail aligned to the IP packet end. This might - * either be the originally provided skb or a trimmed, cloned version if - * the skb frame had data beyond the IP packet. A cloned skb allows us - * to leave the original skb and its full frame unchanged (which might be - * desirable for layer 2 frame jugglers). - * * Caller needs to set the skb network header and free any returned skb if it * differs from the provided skb. */ -int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +int ipv6_mc_check_mld(struct sk_buff *skb) { int ret; @@ -211,6 +192,10 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) if (ret < 0) return ret; - return __ipv6_mc_check_mld(skb, skb_trimmed); + ret = ipv6_mc_check_icmpv6(skb); + if (ret < 0) + return ret; + + return ipv6_mc_check_mld_msg(skb); } EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 181da2c40f9a..cb1b4772dac0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { - struct sk_buff *prev, *next; unsigned int payload_len; - int offset, end; + struct net_device *dev; + struct sk_buff *prev; + int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { @@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, goto err; } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->dev share the same location */ - if (skb->dev) - fq->iif = skb->dev->ifindex; + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -319,11 +286,25 @@ found: fq->q.flags |= INET_FRAG_FIRST_IN; } - return 0; + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; -discard_fq: + skb->_skb_refdst = 0UL; + err = nf_ct_frag6_reasm(fq, skb, prev, dev); + skb->_skb_refdst = orefdst; + return err; + } + + skb_dst_drop(skb); + return -EINPROGRESS; + +insert_error: + if (err == IPFRAG_DUP) + goto err; inet_frag_kill(&fq->q); err: + skb_dst_drop(skb); return -EINVAL; } @@ -333,147 +314,67 @@ err: * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. - * - * returns true if *prev skb has been transformed into the reassembled - * skb, false otherwise. */ -static bool -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); - ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - return false; + goto err; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto err; + + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); - return false; - } - - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - return false; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) - return false; - - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - /* morph head into last received skb: prev. - * - * This allows callers of ipv6 conntrack defrag to continue - * to use the last skb(frag) passed into the reasm engine. - * The last skb frag 'silently' turns into the full reassembled skb. - * - * Since prev is also part of q->fragments we have to clone it first. - */ - if (head != prev) { - struct sk_buff *iter; - - fp = skb_clone(prev, GFP_ATOMIC); - if (!fp) - return false; - - fp->next = prev->next; - - iter = head; - while (iter) { - if (iter->next == prev) { - iter->next = fp; - break; - } - iter = iter->next; - } - - skb_morph(prev, head); - prev->next = head->next; - consume_skb(head); - head = prev; + goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); - skb_shinfo(head)->frag_list = head->next; - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); + skb_reset_transport_header(skb); - for (fp = head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp->sk = NULL; - } - sub_frag_mem_limit(fq->q.net, head->truesize); + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - head->ignore_df = 1; - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + skb->ignore_df = 1; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(skb_network_header(skb), + skb_network_header_len(skb), + skb->csum); fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; - return true; + return 0; + +err: + inet_frag_kill(&fq->q); + return -EINVAL; } /* @@ -542,7 +443,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; - struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -565,10 +465,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - return -EINVAL; - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); @@ -580,31 +476,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); - if (ret < 0) { - if (ret == -EPROTO) { - skb->transport_header = savethdr; - ret = 0; - } - goto out_unlock; + if (ret == -EPROTO) { + skb->transport_header = savethdr; + ret = 0; } /* after queue has assumed skb ownership, only 0 or -EINPROGRESS * must be returned. */ - ret = -EINPROGRESS; - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - unsigned long orefdst = skb->_skb_refdst; + if (ret) + ret = -EINPROGRESS; - skb->_skb_refdst = 0UL; - if (nf_ct_frag6_reasm(fq, skb, dev)) - ret = 0; - skb->_skb_refdst = orefdst; - } else { - skb_dst_drop(skb); - } - -out_unlock: spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 23022447eb49..9c914db44bec 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -225,7 +225,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, skb->len - hdrlen, 0)); } - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 36a3d8dc61f5..24264d0a4b85 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip6_frag_expire(struct timer_list *t) { @@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset) { - struct sk_buff *prev, *next; - struct net_device *dev; - int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); + int offset, end, fragsize; + struct sk_buff *prev_tail; + struct net_device *dev; + int err = -ENOENT; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; + err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); + /* note that if prob_offset is set, the skb is freed elsewhere, + * we do not free it here. + */ return -1; } @@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (end == offset) goto discard_fq; + err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto discard_fq; - if (pskb_trim_rcsum(skb, end - offset)) + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto discard_fq; - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || prev->ip_defrag_offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (next->ip_defrag_offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (prev->ip_defrag_offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && next->ip_defrag_offset < end) - goto discard_fq; - - /* Note : skb->ip_defrag_offset and skb->sk share the same location */ + /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; - if (dev) - fq->iif = dev->ifindex; /* Makes sure compiler wont do silly aliasing games */ barrier(); - skb->ip_defrag_offset = offset; - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; + + if (dev) + fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; @@ -246,44 +216,48 @@ found: if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = ip6_frag_reasm(fq, prev, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev); skb->_skb_refdst = orefdst; - return res; + return err; } skb_dst_drop(skb); - return -1; + return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q); -err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); +err: kfree_skb(skb); - return -1; + return err; } /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; unsigned int nhoff; - int sum_truesize; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); @@ -292,121 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (unlikely(ecn == 0xff)) goto out_fail; - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto out_oom; - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - WARN_ON(head == NULL); - WARN_ON(head->ip_defrag_offset != 0); - - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_oom; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - if (skb_mac_header_was_set(head)) - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + if (skb_mac_header_was_set(skb)) + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); + skb_reset_transport_header(skb); - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - sum_truesize += fp->truesize; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - fp->sk = NULL; - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); - - skb_mark_not_on_list(head); - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->nhoff = nhoff; - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; - IP6CB(head)->frag_max_size = fq->q.max_size; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->nhoff = nhoff; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - skb_postpush_rcsum(head, skb_network_header(head), - skb_network_header_len(head)); + skb_postpush_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); @@ -414,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oversize: @@ -464,10 +358,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - goto fail_hdr; - iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { @@ -485,6 +375,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INHDRERRORS); + /* icmpv6_param_prob() calls kfree_skb(skb) */ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); } return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 40b225f87d5e..dc066fdf7e46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4251,17 +4251,6 @@ struct rt6_nh { struct list_head next; }; -static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) -{ - struct rt6_nh *nh; - - list_for_each_entry(nh, rt6_nh_list, next) { - pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", - &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, - nh->r_cfg.fc_ifindex); - } -} - static int ip6_route_info_append(struct net *net, struct list_head *rt6_nh_list, struct fib6_info *rt, @@ -4407,7 +4396,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nh->fib6_info = NULL; if (err) { if (replace && nhn) - ip6_print_replace_route_err(&rt6_nh_list); + NL_SET_ERR_MSG_MOD(extack, + "multipath route replace failed (check consistency of installed routes)"); err_nh = nh; goto add_errout; } @@ -4822,6 +4812,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg) arg->cb->nlh->nlmsg_seq, flags); } +static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || + (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || + rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || + rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); + if (err) + return err; + + if ((tb[RTA_SRC] && !rtm->rtm_src_len) || + (tb[RTA_DST] && !rtm->rtm_dst_len)) { + NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_SRC: + case RTA_DST: + case RTA_IIF: + case RTA_OIF: + case RTA_MARK: + case RTA_UID: + case RTA_SPORT: + case RTA_DPORT: + case RTA_IP_PROTO: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4836,8 +4893,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct flowi6 fl6 = {}; bool fibmatch; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b81eb7cb815e..e51cda79f0cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tp->snd_cwnd, state == TCP_LISTEN ? fastopenq->max_qlen : diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c3505006f8e..2596ffdeebea 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -102,7 +102,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -static void udp_v6_rehash(struct sock *sk) +void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, @@ -1132,15 +1132,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (udp_sk(sk)->no_check6_tx) + } + if (udp_sk(sk)->no_check6_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 5730e6503cb4..20e324b6f358 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -13,6 +13,7 @@ int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); int udp_v6_get_port(struct sock *sk, unsigned short snum); +void udp_v6_rehash(struct sock *sk); int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index a125aebc29e5..f35907836444 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .recvmsg = udpv6_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 80d8ebc30ded..d65aa019ce85 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1498,6 +1498,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; + if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 45aad3d3108c..bb4d71efb6fb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -231,7 +231,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr_3addr hdr; u8 category; u8 action_code; - } __packed action; + } __packed __aligned(2) action; if (!sdata) return; @@ -2723,7 +2723,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) skb_set_queue_mapping(skb, q); if (!--mesh_hdr->ttl) { - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + if (!is_multicast_ether_addr(hdr->addr1)) + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, + dropped_frames_ttl); goto out; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7d55d4c04088..2662a23c658e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1209,21 +1209,57 @@ static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, }; +static int mpls_netconf_valid_get_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for netconf get request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + + err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb, + NETCONFA_MAX, devconf_mpls_policy, extack); + if (err) + return err; + + for (i = 0; i <= NETCONFA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case NETCONFA_IFINDEX: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_netconf_get_devconf(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX + 1]; - struct netconfmsg *ncm; struct net_device *dev; struct mpls_dev *mdev; struct sk_buff *skb; int ifindex; int err; - err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, - devconf_mpls_policy, extack); + err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) goto errout; @@ -2236,6 +2272,64 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err); } +static int mpls_valid_getroute_req(struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rtmsg *rtm; + int i, err; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid header for get route request"); + return -EINVAL; + } + + if (!netlink_strict_get_check(skb)) + return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + + rtm = nlmsg_data(nlh); + if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) || + rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table || + rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { + NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); + return -EINVAL; + } + if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid flags for get route request"); + return -EINVAL; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + if (err) + return err; + + if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) { + NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS"); + return -EINVAL; + } + + for (i = 0; i <= RTA_MAX; i++) { + if (!tb[i]) + continue; + + switch (i) { + case RTA_DST: + case RTA_NEWDST: + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); + return -EINVAL; + } + } + + return 0; +} + static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct netlink_ext_ack *extack) { @@ -2255,8 +2349,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, u8 n_labels; int err; - err = nlmsg_parse(in_nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_mpls_policy, extack); + err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack); if (err < 0) goto errout; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index beb3a69ce1d4..fefd63a243f2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -174,7 +174,7 @@ config NF_CT_PROTO_DCCP If unsure, say Y. config NF_CT_PROTO_GRE - tristate + bool config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1ae65a314d7a..e66067befa42 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -13,6 +13,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -25,8 +26,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o - # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index fe9abf3cc10a..e969dad66991 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -53,6 +53,7 @@ #endif #include +#include EXPORT_SYMBOL(register_ip_vs_scheduler); @@ -70,6 +71,29 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); +#ifdef CONFIG_IP_VS_PROTO_TCP +INDIRECT_CALLABLE_DECLARE(int + tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP +INDIRECT_CALLABLE_DECLARE(int + udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP) +#define SNAT_CALL(f, ...) \ + INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__) +#elif defined(CONFIG_IP_VS_PROTO_TCP) +#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__) +#elif defined(CONFIG_IP_VS_PROTO_UDP) +#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__) +#else +#define SNAT_CALL(f, ...) f(__VA_ARGS__) +#endif + static unsigned int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -478,7 +502,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { iph->hdr_flags ^= IP_VS_HDR_INVERSE; - cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph); + cp = INDIRECT_CALL_1(pp->conn_in_get, + ip_vs_conn_in_get_proto, svc->ipvs, + svc->af, skb, iph); iph->hdr_flags ^= IP_VS_HDR_INVERSE; if (cp) { @@ -972,7 +998,8 @@ static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1028,7 +1055,8 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -1263,7 +1291,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph)) + if (pp->snat_handler && + !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 @@ -1389,7 +1418,8 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(ipvs, af, skb, &iph); + cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto, + ipvs, af, skb, &iph); if (likely(cp)) { if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) @@ -1644,7 +1674,8 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, AF_INET, skb, &ciph); if (!cp) { int v; @@ -1796,7 +1827,8 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, /* The embedded headers contain source and dest in reverse order * if not from localhost */ - cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, AF_INET6, skb, &ciph); if (!cp) { int v; @@ -1925,7 +1957,8 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(ipvs, af, skb, &iph); + cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, + ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 432141f04af3..7d6318664eb2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2220,6 +2220,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->tcp_fin_timeout, u->udp_timeout); +#ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5320d39976e1..480598cb0f05 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -129,7 +129,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .conn_out_get = ah_esp_conn_out_get, .snat_handler = NULL, .dnat_handler = NULL, - .csum_check = NULL, .state_transition = NULL, .register_app = NULL, .unregister_app = NULL, @@ -152,7 +151,6 @@ struct ip_vs_protocol ip_vs_protocol_esp = { .conn_out_get = ah_esp_conn_out_get, .snat_handler = NULL, .dnat_handler = NULL, - .csum_check = NULL, .state_transition = NULL, .register_app = NULL, .unregister_app = NULL, diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index b0cd7d08f2a7..bc3d1625ecc8 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,6 +9,9 @@ #include #include +static int +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -105,7 +108,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -152,7 +155,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!sctp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -587,7 +590,6 @@ struct ip_vs_protocol ip_vs_protocol_sctp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = sctp_snat_handler, .dnat_handler = sctp_dnat_handler, - .csum_check = sctp_csum_check, .state_name = sctp_state_name, .state_transition = sctp_state_transition, .app_conn_bind = sctp_app_conn_bind, diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1770fc6ce960..479419759983 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -28,9 +28,13 @@ #include #include #include +#include #include +static int +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -143,7 +147,7 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph, } -static int +INDIRECT_CALLABLE_SCOPE int tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { @@ -166,7 +170,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -192,7 +196,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -244,7 +248,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!tcp_csum_check(cp->af, skb, pp)) return 0; /* @@ -275,7 +279,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -736,7 +740,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = tcp_snat_handler, .dnat_handler = tcp_dnat_handler, - .csum_check = tcp_csum_check, .state_name = tcp_state_name, .state_transition = tcp_state_transition, .app_conn_bind = tcp_app_conn_bind, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 0f53c49025f8..646c384910fb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -23,11 +23,15 @@ #include #include #include +#include #include #include #include +static int +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); + static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -133,7 +137,7 @@ udp_partial_csum_update(int af, struct udphdr *uhdr, } -static int +INDIRECT_CALLABLE_SCOPE int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { @@ -156,7 +160,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp)) return 0; /* @@ -186,7 +190,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -239,7 +243,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, int ret; /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + if (!udp_csum_check(cp->af, skb, pp)) return 0; /* @@ -270,7 +274,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = (cp->app && pp->csum_check) ? + skb->ip_summed = cp->app ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -494,7 +498,6 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .conn_out_get = ip_vs_conn_out_get_proto, .snat_handler = udp_snat_handler, .dnat_handler = udp_dnat_handler, - .csum_check = udp_csum_check, .state_transition = udp_state_transition, .state_name = udp_state_name, .register_app = udp_register_app, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 741b533148ba..815956ac5a76 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -222,6 +222,24 @@ static u32 hash_conntrack(const struct net *net, return scale_hash(hash_conntrack_raw(tuple, net)); } +static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ struct { + __be16 sport; + __be16 dport; + } _inet_hdr, *inet_hdr; + + /* Actually only need first 4 bytes to get ports. */ + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); + if (!inet_hdr) + return false; + + tuple->src.u.udp.port = inet_hdr->sport; + tuple->dst.u.udp.port = inet_hdr->dport; + return true; +} + static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -229,16 +247,11 @@ nf_ct_get_tuple(const struct sk_buff *skb, u_int16_t l3num, u_int8_t protonum, struct net *net, - struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto) + struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; __be32 _addrs[8]; - struct { - __be16 sport; - __be16 dport; - } _inet_hdr, *inet_hdr; memset(tuple, 0, sizeof(*tuple)); @@ -274,16 +287,36 @@ nf_ct_get_tuple(const struct sk_buff *skb, tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; - if (unlikely(l4proto->pkt_to_tuple)) - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); + switch (protonum) { +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); +#endif + case IPPROTO_ICMP: + return icmp_pkt_to_tuple(skb, dataoff, net, tuple); +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return gre_pkt_to_tuple(skb, dataoff, net, tuple); +#endif + case IPPROTO_TCP: + case IPPROTO_UDP: /* fallthrough */ + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif + default: + break; + } - /* Actually only need first 4 bytes to get ports. */ - inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); - if (!inet_hdr) - return false; - - tuple->src.u.udp.port = inet_hdr->sport; - tuple->dst.u.udp.port = inet_hdr->dport; return true; } @@ -366,33 +399,20 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_l4proto *l4proto; u8 protonum; int protoff; - int ret; - - rcu_read_lock(); protoff = get_l4proto(skb, nhoff, l3num, &protonum); - if (protoff <= 0) { - rcu_read_unlock(); + if (protoff <= 0) return false; - } - l4proto = __nf_ct_l4proto_find(protonum); - - ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, - l4proto); - - rcu_read_unlock(); - return ret; + return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse)); @@ -415,8 +435,14 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, inverse->dst.protonum = orig->dst.protonum; - if (unlikely(l4proto->invert_tuple)) - return l4proto->invert_tuple(inverse, orig); + switch (orig->dst.protonum) { + case IPPROTO_ICMP: + return nf_conntrack_invert_icmp_tuple(inverse, orig); +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_invert_icmpv6_tuple(inverse, orig); +#endif + } inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; @@ -526,11 +552,20 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl) } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); +static void destroy_gre_conntrack(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_conn *master = ct->master; + + if (master) + nf_ct_gre_keymap_destroy(master); +#endif +} + static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - const struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); WARN_ON(atomic_read(&nfct->use) != 0); @@ -539,9 +574,9 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_tmpl_free(ct); return; } - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->destroy) - l4proto->destroy(ct); + + if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) + destroy_gre_conntrack(ct); local_bh_disable(); /* Expectations will have been removed in clean_from_lists, @@ -840,7 +875,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, enum ip_conntrack_info oldinfo; struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->allow_clash && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { @@ -1112,7 +1147,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true; @@ -1342,7 +1377,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@ -1355,7 +1389,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; - if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { + if (!nf_ct_invert_tuple(&repl_tuple, tuple)) { pr_debug("Can't invert tuple.\n"); return NULL; } @@ -1437,7 +1471,6 @@ resolve_normal_ct(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, - const struct nf_conntrack_l4proto *l4proto, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; @@ -1450,7 +1483,7 @@ resolve_normal_ct(struct nf_conn *tmpl, if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, - &tuple, l4proto)) { + &tuple)) { pr_debug("Can't get tuple\n"); return 0; } @@ -1460,7 +1493,7 @@ resolve_normal_ct(struct nf_conn *tmpl, hash = hash_conntrack_raw(&tuple, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { - h = init_conntrack(state->net, tmpl, &tuple, l4proto, + h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; @@ -1522,10 +1555,66 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl, return ret; } +static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, + enum ip_conntrack_info ctinfo) +{ + const unsigned int *timeout = nf_ct_timeout_lookup(ct); + + if (!timeout) + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; + + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); + return NF_ACCEPT; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int nf_conntrack_handle_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) +{ + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + return nf_conntrack_tcp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_UDP: + return nf_conntrack_udp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_ICMP: + return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_conntrack_udplite_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_conntrack_sctp_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_conntrack_dccp_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return nf_conntrack_gre_packet(ct, skb, dataoff, + ctinfo, state); +#endif + } + + return generic_packet(ct, skb, ctinfo); +} + unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { - const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; @@ -1552,8 +1641,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) goto out; } - l4proto = __nf_ct_l4proto_find(protonum); - if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); @@ -1567,7 +1654,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, - protonum, l4proto, state); + protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); @@ -1583,7 +1670,7 @@ repeat: goto out; } - ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); + ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@ -1614,19 +1701,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_in); -bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig) -{ - bool ret; - - rcu_read_lock(); - ret = nf_ct_invert_tuple(inverse, orig, - __nf_ct_l4proto_find(orig->dst.protonum)); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); - /* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __nf_conntrack_confirm */ void nf_conntrack_alter_reply(struct nf_conn *ct, @@ -1757,7 +1831,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { - const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; @@ -1778,10 +1851,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb) if (dataoff <= 0) return -1; - l4proto = nf_ct_l4proto_find_get(l4num); - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, - l4num, net, &tuple, l4proto)) + l4num, net, &tuple)) return -1; if (ct->status & IPS_SRC_NAT) { @@ -2413,15 +2484,10 @@ int nf_conntrack_init_net(struct net *net) nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_helper_pernet_init(net); + nf_conntrack_proto_pernet_init(net); - ret = nf_conntrack_proto_pernet_init(net); - if (ret < 0) - goto err_proto; return 0; -err_proto: - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); err_pcpu_lists: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3034038bfdf0..334d6e5b7762 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -610,7 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v) expect->tuple.src.l3num, expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - __nf_ct_l4proto_find(expect->tuple.dst.protonum)); + nf_ct_l4proto_find(expect->tuple.dst.protonum)); if (expect->flags & NF_CT_EXPECT_PERMANENT) { seq_puts(s, "PERMANENT"); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1213beb5a714..8071bb04a849 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -134,7 +134,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb, ret = ctnetlink_dump_tuples_ip(skb, tuple); if (ret >= 0) { - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto); } rcu_read_unlock(); @@ -182,7 +182,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) struct nlattr *nest_proto; int ret; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->to_nlattr) return 0; @@ -590,7 +590,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct) len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1); len *= 3u; /* ORIG, REPLY, MASTER */ - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); len += l4proto->nlattr_size; if (l4proto->nlattr_tuple_size) { len4 = l4proto->nlattr_tuple_size(); @@ -1059,7 +1059,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr, tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); rcu_read_lock(); - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, @@ -1722,11 +1722,9 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, if (err < 0) return err; - rcu_read_lock(); - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->from_nlattr) err = l4proto->from_nlattr(tb, ct); - rcu_read_unlock(); return err; } @@ -2676,7 +2674,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, rcu_read_lock(); ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { - l4proto = __nf_ct_l4proto_find(tuple->dst.protonum); + l4proto = nf_ct_l4proto_find(tuple->dst.protonum); ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 11562f2a08bb..976f1dcb97f0 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp_other; /* obviously this tuple inversion only works until you do NAT */ - nf_ct_invert_tuplepr(&inv_t, &exp->tuple); + nf_ct_invert_tuple(&inv_t, &exp->tuple); pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 859f5d07a915..b9403a266a2e 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -43,40 +43,9 @@ extern unsigned int nf_conntrack_net_id; -static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly; - static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL -static int -nf_ct_register_sysctl(struct net *net, - struct ctl_table_header **header, - const char *path, - struct ctl_table *table) -{ - if (*header == NULL) { - *header = register_net_sysctl(net, path, table); - if (*header == NULL) - return -ENOMEM; - } - - return 0; -} - -static void -nf_ct_unregister_sysctl(struct ctl_table_header **header, - struct ctl_table **table, - unsigned int users) -{ - if (users > 0) - return; - - unregister_net_sysctl_table(*header); - kfree(*table); - *header = NULL; - *table = NULL; -} - __printf(5, 6) void nf_l4proto_log_invalid(const struct sk_buff *skb, struct net *net, @@ -124,295 +93,82 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); #endif -const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto) +const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) { - if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos))) - return &nf_conntrack_l4proto_generic; - - return rcu_dereference(nf_ct_protos[l4proto]); -} -EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); - -const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num) -{ - const struct nf_conntrack_l4proto *p; - - rcu_read_lock(); - p = __nf_ct_l4proto_find(l4num); - if (!try_module_get(p->me)) - p = &nf_conntrack_l4proto_generic; - rcu_read_unlock(); - - return p; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); - -void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); - -static int kill_l4proto(struct nf_conn *i, void *data) -{ - const struct nf_conntrack_l4proto *l4proto; - l4proto = data; - return nf_ct_protonum(i) == l4proto->l4proto; -} - -static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, - const struct nf_conntrack_l4proto *l4proto) -{ - if (l4proto->get_net_proto) { - /* statically built-in protocols use static per-net */ - return l4proto->get_net_proto(net); - } else if (l4proto->net_id) { - /* ... and loadable protocols use dynamic per-net */ - return net_generic(net, *l4proto->net_id); - } - return NULL; -} - -static -int nf_ct_l4proto_register_sysctl(struct net *net, - struct nf_proto_net *pn) -{ - int err = 0; - -#ifdef CONFIG_SYSCTL - if (pn->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &pn->ctl_table_header, - "net/netfilter", - pn->ctl_table); - if (err < 0) { - if (!pn->users) { - kfree(pn->ctl_table); - pn->ctl_table = NULL; - } - } - } -#endif /* CONFIG_SYSCTL */ - return err; -} - -static -void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); -#endif /* CONFIG_SYSCTL */ -} - -/* FIXME: Allow NULL functions and sub in pointers to generic for - them. --RR */ -int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto) -{ - int ret = 0; - - if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) || - (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) - return -EINVAL; - - mutex_lock(&nf_ct_proto_mutex); - if (rcu_dereference_protected( - nf_ct_protos[l4proto->l4proto], - lockdep_is_held(&nf_ct_proto_mutex) - ) != &nf_conntrack_l4proto_generic) { - ret = -EBUSY; - goto out_unlock; + switch (l4proto) { + case IPPROTO_UDP: return &nf_conntrack_l4proto_udp; + case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp; + case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; +#endif +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6; +#endif /* CONFIG_IPV6 */ } - rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto); -out_unlock: - mutex_unlock(&nf_ct_proto_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); + return &nf_conntrack_l4proto_generic; +}; +EXPORT_SYMBOL_GPL(nf_ct_l4proto_find); -int nf_ct_l4proto_pernet_register_one(struct net *net, - const struct nf_conntrack_l4proto *l4proto) +static unsigned int nf_confirm(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) { - int ret = 0; - struct nf_proto_net *pn = NULL; - - if (l4proto->init_net) { - ret = l4proto->init_net(net); - if (ret < 0) - goto out; - } - - pn = nf_ct_l4proto_net(net, l4proto); - if (pn == NULL) - goto out; - - ret = nf_ct_l4proto_register_sysctl(net, pn); - if (ret < 0) - goto out; - - pn->users++; -out: - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); - -static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) - -{ - BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos)); - - BUG_ON(rcu_dereference_protected( - nf_ct_protos[l4proto->l4proto], - lockdep_is_held(&nf_ct_proto_mutex) - ) != l4proto); - rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], - &nf_conntrack_l4proto_generic); -} - -void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) -{ - mutex_lock(&nf_ct_proto_mutex); - __nf_ct_l4proto_unregister_one(l4proto); - mutex_unlock(&nf_ct_proto_mutex); - - synchronize_net(); - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); - -void nf_ct_l4proto_pernet_unregister_one(struct net *net, - const struct nf_conntrack_l4proto *l4proto) -{ - struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); - - if (pn == NULL) - return; - - pn->users--; - nf_ct_l4proto_unregister_sysctl(pn); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); - -static void -nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], - unsigned int num_proto) -{ - int i; - - mutex_lock(&nf_ct_proto_mutex); - for (i = 0; i < num_proto; i++) - __nf_ct_l4proto_unregister_one(l4proto[i]); - mutex_unlock(&nf_ct_proto_mutex); - - synchronize_net(); - - for (i = 0; i < num_proto; i++) - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]); -} - -static int -nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], - unsigned int num_proto) -{ - int ret = -EINVAL; - unsigned int i; - - for (i = 0; i < num_proto; i++) { - ret = nf_ct_l4proto_register_one(l4proto[i]); - if (ret < 0) - break; - } - if (i != num_proto) { - pr_err("nf_conntrack: can't register l4 %d proto.\n", - l4proto[i]->l4proto); - nf_ct_l4proto_unregister(l4proto, i); - } - return ret; -} - -int nf_ct_l4proto_pernet_register(struct net *net, - const struct nf_conntrack_l4proto *const l4proto[], - unsigned int num_proto) -{ - int ret = -EINVAL; - unsigned int i; - - for (i = 0; i < num_proto; i++) { - ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]); - if (ret < 0) - break; - } - if (i != num_proto) { - pr_err("nf_conntrack %d: pernet registration failed\n", - l4proto[i]->l4proto); - nf_ct_l4proto_pernet_unregister(net, l4proto, i); - } - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); - -void nf_ct_l4proto_pernet_unregister(struct net *net, - const struct nf_conntrack_l4proto *const l4proto[], - unsigned int num_proto) -{ - while (num_proto-- != 0) - nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); - -static unsigned int ipv4_helper(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; const struct nf_conn_help *help; - const struct nf_conntrack_helper *helper; - - /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED_REPLY) - return NF_ACCEPT; help = nfct_help(ct); - if (!help) - return NF_ACCEPT; + if (help) { + const struct nf_conntrack_helper *helper; + int ret; - /* rcu_read_lock()ed by nf_hook_thresh */ - helper = rcu_dereference(help->helper); - if (!helper) - return NF_ACCEPT; + /* rcu_read_lock()ed by nf_hook_thresh */ + helper = rcu_dereference(help->helper); + if (helper) { + ret = helper->help(skb, + protoff, + ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + } - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), - ct, ctinfo); + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } + + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb); } static unsigned int ipv4_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct nf_conn *ct; enum ip_conntrack_info ctinfo; + struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return nf_conntrack_confirm(skb); - /* adjust seqs for loopback traffic only in outgoing direction */ - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && - !nf_is_loopback_packet(skb)) { - if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return NF_DROP; - } - } -out: - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_confirm(skb, + skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); } static unsigned int ipv4_conntrack_in(void *priv, @@ -460,24 +216,12 @@ static const struct nf_hook_ops ipv4_conntrack_ops[] = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, - { - .hook = ipv4_helper, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, { .hook = ipv4_confirm, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, - { - .hook = ipv4_helper, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_HELPER, - }, { .hook = ipv4_confirm, .pf = NFPROTO_IPV4, @@ -623,31 +367,21 @@ static unsigned int ipv6_confirm(void *priv, struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned char pnum = ipv6_hdr(skb)->nexthdr; - int protoff; __be16 frag_off; + int protoff; ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return nf_conntrack_confirm(skb); protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); - goto out; + return nf_conntrack_confirm(skb); } - /* adjust seqs for loopback traffic only in outgoing direction */ - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && - !nf_is_loopback_packet(skb)) { - if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return NF_DROP; - } - } -out: - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_confirm(skb, protoff, ct, ctinfo); } static unsigned int ipv6_conntrack_in(void *priv, @@ -664,42 +398,6 @@ static unsigned int ipv6_conntrack_local(void *priv, return nf_conntrack_in(skb, state); } -static unsigned int ipv6_helper(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct nf_conn *ct; - const struct nf_conn_help *help; - const struct nf_conntrack_helper *helper; - enum ip_conntrack_info ctinfo; - __be16 frag_off; - int protoff; - u8 nexthdr; - - /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); - if (!ct || ctinfo == IP_CT_RELATED_REPLY) - return NF_ACCEPT; - - help = nfct_help(ct); - if (!help) - return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_thresh */ - helper = rcu_dereference(help->helper); - if (!helper) - return NF_ACCEPT; - - nexthdr = ipv6_hdr(skb)->nexthdr; - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, - &frag_off); - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { - pr_debug("proto header not found\n"); - return NF_ACCEPT; - } - - return helper->help(skb, protoff, ct, ctinfo); -} - static const struct nf_hook_ops ipv6_conntrack_ops[] = { { .hook = ipv6_conntrack_in, @@ -713,24 +411,12 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, - { - .hook = ipv6_helper, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_CONNTRACK_HELPER, - }, { .hook = ipv6_confirm, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, - { - .hook = ipv6_helper, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_CONNTRACK_HELPER, - }, { .hook = ipv6_confirm, .pf = NFPROTO_IPV6, @@ -874,27 +560,9 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto) } EXPORT_SYMBOL_GPL(nf_ct_netns_put); -static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { - &nf_conntrack_l4proto_tcp, - &nf_conntrack_l4proto_udp, - &nf_conntrack_l4proto_icmp, -#ifdef CONFIG_NF_CT_PROTO_DCCP - &nf_conntrack_l4proto_dccp, -#endif -#ifdef CONFIG_NF_CT_PROTO_SCTP - &nf_conntrack_l4proto_sctp, -#endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - &nf_conntrack_l4proto_udplite, -#endif -#if IS_ENABLED(CONFIG_IPV6) - &nf_conntrack_l4proto_icmpv6, -#endif /* CONFIG_IPV6 */ -}; - int nf_conntrack_proto_init(void) { - int ret = 0, i; + int ret; ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) @@ -906,18 +574,8 @@ int nf_conntrack_proto_init(void) goto cleanup_sockopt; #endif - for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) - RCU_INIT_POINTER(nf_ct_protos[i], - &nf_conntrack_l4proto_generic); - - ret = nf_ct_l4proto_register(builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - if (ret < 0) - goto cleanup_sockopt2; - return ret; -cleanup_sockopt2: - nf_unregister_sockopt(&so_getorigdst); + #if IS_ENABLED(CONFIG_IPV6) cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst6); @@ -933,43 +591,33 @@ void nf_conntrack_proto_fini(void) #endif } -int nf_conntrack_proto_pernet_init(struct net *net) +void nf_conntrack_proto_pernet_init(struct net *net) { - int err; - struct nf_proto_net *pn = nf_ct_l4proto_net(net, - &nf_conntrack_l4proto_generic); - - err = nf_conntrack_l4proto_generic.init_net(net); - if (err < 0) - return err; - err = nf_ct_l4proto_register_sysctl(net, - pn); - if (err < 0) - return err; - - err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - if (err < 0) { - nf_ct_l4proto_unregister_sysctl(pn); - return err; - } - - pn->users++; - return 0; + nf_conntrack_generic_init_net(net); + nf_conntrack_udp_init_net(net); + nf_conntrack_tcp_init_net(net); + nf_conntrack_icmp_init_net(net); +#if IS_ENABLED(CONFIG_IPV6) + nf_conntrack_icmpv6_init_net(net); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + nf_conntrack_dccp_init_net(net); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + nf_conntrack_sctp_init_net(net); +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + nf_conntrack_gre_init_net(net); +#endif } void nf_conntrack_proto_pernet_fini(struct net *net) { - struct nf_proto_net *pn = nf_ct_l4proto_net(net, - &nf_conntrack_l4proto_generic); - - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, - ARRAY_SIZE(builtin_l4proto)); - pn->users--; - nf_ct_l4proto_unregister_sysctl(pn); +#ifdef CONFIG_NF_CT_PROTO_GRE + nf_ct_gre_keymap_flush(net); +#endif } - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 023c1445bc39..6fca80587505 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -472,9 +472,10 @@ out_invalid: return true; } -static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; @@ -723,123 +724,28 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -/* template, data assigned later */ -static struct ctl_table dccp_sysctl_table[] = { - { - .procname = "nf_conntrack_dccp_timeout_request", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_respond", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_partopen", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_open", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_closereq", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_closing", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_timeout_timewait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_dccp_loose", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, - struct nf_dccp_net *dn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(dccp_sysctl_table, - sizeof(dccp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; - pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; - pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; - pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; - pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; - pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; - pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; - pn->ctl_table[7].data = &dn->dccp_loose; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - pn->ctl_table[0].procname = NULL; -#endif - return 0; -} - -static int dccp_init_net(struct net *net) +void nf_conntrack_dccp_init_net(struct net *net) { struct nf_dccp_net *dn = nf_dccp_pernet(net); - struct nf_proto_net *pn = &dn->pn; - if (!pn->users) { - /* default values */ - dn->dccp_loose = 1; - dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; - dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; - dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; + /* default values */ + dn->dccp_loose = 1; + dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; - } - - return dccp_kmemdup_sysctl_table(net, pn, dn); -} - -static struct nf_proto_net *dccp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.dccp.pn; + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { .l4proto = IPPROTO_DCCP, - .packet = dccp_packet, .can_early_drop = dccp_can_early_drop, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, @@ -862,6 +768,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = dccp_init_net, - .get_net_proto = dccp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 5da19d5fbc76..0f526fafecae 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -15,50 +15,6 @@ static const unsigned int nf_ct_generic_timeout = 600*HZ; -static bool nf_generic_should_process(u8 proto) -{ - switch (proto) { -#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE - case IPPROTO_GRE: - return false; -#endif - default: - return true; - } -} - -static bool generic_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) -{ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - - return true; -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int generic_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - const unsigned int *timeout = nf_ct_timeout_lookup(ct); - - if (!nf_generic_should_process(nf_ct_protonum(ct))) { - pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n", - nf_ct_protonum(ct)); - return -NF_ACCEPT; - } - - if (!timeout) - timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; - - nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); - return NF_ACCEPT; -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include @@ -104,53 +60,16 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table generic_sysctl_table[] = { - { - .procname = "nf_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(generic_sysctl_table, - sizeof(generic_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &gn->timeout; -#endif - return 0; -} - -static int generic_init_net(struct net *net) +void nf_conntrack_generic_init_net(struct net *net) { struct nf_generic_net *gn = nf_generic_pernet(net); - struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - - return generic_kmemdup_sysctl_table(pn, gn); -} - -static struct nf_proto_net *generic_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.generic.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, - .pkt_to_tuple = generic_pkt_to_tuple, - .packet = generic_packet, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = generic_timeout_nlattr_to_obj, @@ -160,6 +79,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = generic_init_net, - .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 8899b51aad44..ee9ab10a32e4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -48,24 +48,25 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = { [GRE_CT_REPLIED] = 180*HZ, }; -static unsigned int proto_gre_net_id __read_mostly; +/* used when expectation is added */ +static DEFINE_SPINLOCK(keymap_lock); -static inline struct netns_proto_gre *gre_pernet(struct net *net) +static inline struct nf_gre_net *gre_pernet(struct net *net) { - return net_generic(net, proto_gre_net_id); + return &net->ct.nf_ct_proto.gre; } -static void nf_ct_gre_keymap_flush(struct net *net) +void nf_ct_gre_keymap_flush(struct net *net) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km, *tmp; - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { - list_del(&km->list); - kfree(km); + list_del_rcu(&km->list); + kfree_rcu(km, rcu); } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, @@ -81,18 +82,16 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, /* look up the source key for a given tuple */ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_gre_keymap *km; __be16 key = 0; - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); @@ -105,21 +104,17 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ - read_lock_bh(&net_gre->keymap_lock); - list_for_each_entry(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) { - read_unlock_bh(&net_gre->keymap_lock); + list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *kmp) return 0; - } } - read_unlock_bh(&net_gre->keymap_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; @@ -134,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); list_add_tail(&km->list, &net_gre->keymap_list); - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); return 0; } @@ -145,32 +140,30 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - struct netns_proto_gre *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); - write_lock_bh(&net_gre->keymap_lock); + spin_lock_bh(&keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", ct_pptp_info->keymap[dir]); - list_del(&ct_pptp_info->keymap[dir]->list); - kfree(ct_pptp_info->keymap[dir]); + list_del_rcu(&ct_pptp_info->keymap[dir]->list); + kfree_rcu(ct_pptp_info->keymap[dir], rcu); ct_pptp_info->keymap[dir] = NULL; } } - write_unlock_bh(&net_gre->keymap_lock); + spin_unlock_bh(&keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ /* gre hdr info to tuple */ -static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) +bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct pptp_gre_header *pgrehdr; struct pptp_gre_header _pgrehdr; @@ -216,15 +209,15 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) static unsigned int *gre_get_timeouts(struct net *net) { - return gre_pernet(net)->gre_timeouts; + return gre_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntrack */ -static int gre_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_gre_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { if (state->pf != NFPROTO_IPV4) return -NF_ACCEPT; @@ -256,19 +249,6 @@ static int gre_packet(struct nf_conn *ct, return NF_ACCEPT; } -/* Called when a conntrack entry has already been removed from the hashes - * and is about to be deleted from memory */ -static void gre_destroy(struct nf_conn *ct) -{ - struct nf_conn *master = ct->master; - pr_debug(" entering\n"); - - if (!master) - pr_debug("no master !?!\n"); - else - nf_ct_gre_keymap_destroy(master); -} - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include @@ -278,13 +258,13 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct netns_proto_gre *net_gre = gre_pernet(net); + struct nf_gre_net *net_gre = gre_pernet(net); if (!timeouts) timeouts = gre_get_timeouts(net); /* set default timeouts for GRE. */ - timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; - timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; + timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = @@ -320,69 +300,22 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table gre_sysctl_table[] = { - { - .procname = "nf_conntrack_gre_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_gre_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - {} -}; -#endif - -static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf, - struct netns_proto_gre *net_gre) +void nf_conntrack_gre_init_net(struct net *net) { -#ifdef CONFIG_SYSCTL + struct nf_gre_net *net_gre = gre_pernet(net); int i; - if (nf->ctl_table) - return 0; - - nf->ctl_table = kmemdup(gre_sysctl_table, - sizeof(gre_sysctl_table), - GFP_KERNEL); - if (!nf->ctl_table) - return -ENOMEM; - - for (i = 0; i < GRE_CT_MAX; i++) - nf->ctl_table[i].data = &net_gre->gre_timeouts[i]; -#endif - return 0; -} - -static int gre_init_net(struct net *net) -{ - struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_proto_net *nf = &net_gre->nf; - int i; - - rwlock_init(&net_gre->keymap_lock); INIT_LIST_HEAD(&net_gre->keymap_list); for (i = 0; i < GRE_CT_MAX; i++) - net_gre->gre_timeouts[i] = gre_timeouts[i]; - - return gre_kmemdup_sysctl_table(net, nf, net_gre); + net_gre->timeouts[i] = gre_timeouts[i]; } /* protocol helper struct */ -static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = { .l4proto = IPPROTO_GRE, - .pkt_to_tuple = gre_pkt_to_tuple, #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, #endif - .packet = gre_packet, - .destroy = gre_destroy, - .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, @@ -398,61 +331,4 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { .nla_policy = gre_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .net_id = &proto_gre_net_id, - .init_net = gre_init_net, }; - -static int proto_gre_net_init(struct net *net) -{ - int ret = 0; - - ret = nf_ct_l4proto_pernet_register_one(net, - &nf_conntrack_l4proto_gre4); - if (ret < 0) - pr_err("nf_conntrack_gre4: pernet registration failed.\n"); - return ret; -} - -static void proto_gre_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4); - nf_ct_gre_keymap_flush(net); -} - -static struct pernet_operations proto_gre_net_ops = { - .init = proto_gre_net_init, - .exit = proto_gre_net_exit, - .id = &proto_gre_net_id, - .size = sizeof(struct netns_proto_gre), -}; - -static int __init nf_ct_proto_gre_init(void) -{ - int ret; - - BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0); - - ret = register_pernet_subsys(&proto_gre_net_ops); - if (ret < 0) - goto out_pernet; - ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4); - if (ret < 0) - goto out_gre4; - - return 0; -out_gre4: - unregister_pernet_subsys(&proto_gre_net_ops); -out_pernet: - return ret; -} - -static void __exit nf_ct_proto_gre_fini(void) -{ - nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4); - unregister_pernet_subsys(&proto_gre_net_ops); -} - -module_init(nf_ct_proto_gre_init); -module_exit(nf_ct_proto_gre_fini); - -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c index de64d8a5fdfd..7df477996b16 100644 --- a/net/netfilter/nf_conntrack_proto_icmp.c +++ b/net/netfilter/nf_conntrack_proto_icmp.c @@ -25,8 +25,8 @@ static const unsigned int nf_ct_icmp_timeout = 30*HZ; -static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, - struct net *net, struct nf_conntrack_tuple *tuple) +bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct net *net, struct nf_conntrack_tuple *tuple) { const struct icmphdr *hp; struct icmphdr _hdr; @@ -54,8 +54,8 @@ static const u_int8_t invmap[] = { [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 }; -static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { if (orig->dst.u.icmp.type >= sizeof(invmap) || !invmap[orig->dst.u.icmp.type]) @@ -68,11 +68,10 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, } /* Returns verdict for packet, or -1 for invalid. */ -static int icmp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_icmp_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic @@ -110,7 +109,6 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, const struct nf_hook_state *state) { struct nf_conntrack_tuple innertuple, origtuple; - const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_zone *zone; enum ip_conntrack_info ctinfo; @@ -128,12 +126,9 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_thresh */ - innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum); - /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { + if (!nf_ct_invert_tuple(&innertuple, &origtuple)) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } @@ -303,56 +298,16 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table icmp_sysctl_table[] = { - { - .procname = "nf_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(icmp_sysctl_table, - sizeof(icmp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &in->timeout; -#endif - return 0; -} - -static int icmp_init_net(struct net *net) +void nf_conntrack_icmp_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmp_pernet(net); - struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - - return icmp_kmemdup_sysctl_table(pn, in); -} - -static struct nf_proto_net *icmp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.icmp.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = { .l4proto = IPPROTO_ICMP, - .pkt_to_tuple = icmp_pkt_to_tuple, - .invert_tuple = icmp_invert_tuple, - .packet = icmp_packet, - .destroy = NULL, - .me = NULL, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmp_tuple_to_nlattr, .nlattr_tuple_size = icmp_nlattr_tuple_size, @@ -368,6 +323,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = .nla_policy = icmp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = icmp_init_net, - .get_net_proto = icmp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index a15eefb8e317..bec4a3211658 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -30,10 +30,10 @@ static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; -static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct net *net, - struct nf_conntrack_tuple *tuple) +bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct net *net, + struct nf_conntrack_tuple *tuple) { const struct icmp6hdr *hp; struct icmp6hdr _hdr; @@ -67,8 +67,8 @@ static const u_int8_t noct_valid_new[] = { [ICMPV6_MLD2_REPORT - 130] = 1 }; -static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { int type = orig->dst.u.icmp.type - 128; if (type < 0 || type >= sizeof(invmap) || !invmap[type]) @@ -86,11 +86,10 @@ static unsigned int *icmpv6_get_timeouts(struct net *net) } /* Returns verdict for packet, or -1 for invalid. */ -static int icmpv6_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_icmpv6_packet(struct nf_conn *ct, + struct sk_buff *skb, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeout = nf_ct_timeout_lookup(ct); static const u8 valid_new[] = { @@ -131,7 +130,6 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; - const struct nf_conntrack_l4proto *inproto; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; @@ -147,12 +145,9 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_thresh */ - inproto = __nf_ct_l4proto_find(origtuple.dst.protonum); - /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { + if (!nf_ct_invert_tuple(&intuple, &origtuple)) { pr_debug("icmpv6_error: Can't invert tuple\n"); return -NF_ACCEPT; } @@ -314,54 +309,16 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table icmpv6_sysctl_table[] = { - { - .procname = "nf_conntrack_icmpv6_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL - pn->ctl_table = kmemdup(icmpv6_sysctl_table, - sizeof(icmpv6_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &in->timeout; -#endif - return 0; -} - -static int icmpv6_init_net(struct net *net) +void nf_conntrack_icmpv6_init_net(struct net *net) { struct nf_icmp_net *in = nf_icmpv6_pernet(net); - struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmpv6_timeout; - - return icmpv6_kmemdup_sysctl_table(pn, in); -} - -static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.icmpv6.pn; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l4proto = IPPROTO_ICMPV6, - .pkt_to_tuple = icmpv6_pkt_to_tuple, - .invert_tuple = icmpv6_invert_tuple, - .packet = icmpv6_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, @@ -377,6 +334,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = icmpv6_init_net, - .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d53e3e78f605..a7818101ad80 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -357,11 +357,11 @@ out_invalid: } /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ -static int sctp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_sctp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { enum sctp_conntrack new_state, old_state; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -642,116 +642,18 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - -#ifdef CONFIG_SYSCTL -static struct ctl_table sctp_sysctl_table[] = { - { - .procname = "nf_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif - -static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(sctp_sysctl_table, - sizeof(sctp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; - pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; - pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; -#endif - return 0; -} - -static int sctp_init_net(struct net *net) +void nf_conntrack_sctp_init_net(struct net *net) { struct nf_sctp_net *sn = nf_sctp_pernet(net); - struct nf_proto_net *pn = &sn->pn; + int i; - if (!pn->users) { - int i; + for (i = 0; i < SCTP_CONNTRACK_MAX; i++) + sn->timeouts[i] = sctp_timeouts[i]; - for (i = 0; i < SCTP_CONNTRACK_MAX; i++) - sn->timeouts[i] = sctp_timeouts[i]; - - /* timeouts[0] is unused, init it so ->timeouts[0] contains - * 'new' timeout, like udp or icmp. - */ - sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; - } - - return sctp_kmemdup_sysctl_table(pn, sn); -} - -static struct nf_proto_net *sctp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.sctp.pn; + /* timeouts[0] is unused, init it so ->timeouts[0] contains + * 'new' timeout, like udp or icmp. + */ + sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { @@ -759,9 +661,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, #endif - .packet = sctp_packet, .can_early_drop = sctp_can_early_drop, - .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_size = SCTP_NLATTR_SIZE, .to_nlattr = sctp_to_nlattr, @@ -780,6 +680,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = sctp_init_net, - .get_net_proto = sctp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4dcbd51a8e97..01c748fa8913 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -829,11 +829,11 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, } /* Returns verdict for packet, or -1 for invalid. */ -static int tcp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_tcp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = nf_tcp_pernet(net); @@ -1387,146 +1387,21 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table tcp_sysctl_table[] = { - { - .procname = "nf_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_timeout_unacknowledged", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nf_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "nf_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - - pn->ctl_table = kmemdup(tcp_sysctl_table, - sizeof(tcp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - - pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; - pn->ctl_table[10].data = &tn->tcp_loose; - pn->ctl_table[11].data = &tn->tcp_be_liberal; - pn->ctl_table[12].data = &tn->tcp_max_retrans; -#endif - return 0; -} - -static int tcp_init_net(struct net *net) +void nf_conntrack_tcp_init_net(struct net *net) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - struct nf_proto_net *pn = &tn->pn; + int i; - if (!pn->users) { - int i; + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; - for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) - tn->timeouts[i] = tcp_timeouts[i]; - - /* timeouts[0] is unused, make it same as SYN_SENT so - * ->timeouts[0] contains 'new' timeout, like udp or icmp. - */ - tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; - tn->tcp_loose = nf_ct_tcp_loose; - tn->tcp_be_liberal = nf_ct_tcp_be_liberal; - tn->tcp_max_retrans = nf_ct_tcp_max_retrans; - } - - return tcp_kmemdup_sysctl_table(pn, tn); -} - -static struct nf_proto_net *tcp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.tcp.pn; + /* timeouts[0] is unused, make it same as SYN_SENT so + * ->timeouts[0] contains 'new' timeout, like udp or icmp. + */ + tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT]; + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = @@ -1535,7 +1410,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = #ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, #endif - .packet = tcp_packet, .can_early_drop = tcp_can_early_drop, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = tcp_to_nlattr, @@ -1556,6 +1430,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = tcp_init_net, - .get_net_proto = tcp_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index b4f5d5e82031..951366dfbec3 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -85,11 +85,11 @@ static bool udp_error(struct sk_buff *skb, } /* Returns verdict for packet, and may modify conntracktype */ -static int udp_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_udp_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeouts; @@ -177,11 +177,11 @@ static bool udplite_error(struct sk_buff *skb, } /* Returns verdict for packet, and may modify conntracktype */ -static int udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) +int nf_conntrack_udplite_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) { unsigned int *timeouts; @@ -260,66 +260,19 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -#ifdef CONFIG_SYSCTL -static struct ctl_table udp_sysctl_table[] = { - { - .procname = "nf_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "nf_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_SYSCTL */ - -static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL - if (pn->ctl_table) - return 0; - pn->ctl_table = kmemdup(udp_sysctl_table, - sizeof(udp_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_table) - return -ENOMEM; - pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif - return 0; -} - -static int udp_init_net(struct net *net) +void nf_conntrack_udp_init_net(struct net *net) { struct nf_udp_net *un = nf_udp_pernet(net); - struct nf_proto_net *pn = &un->pn; + int i; - if (!pn->users) { - int i; - - for (i = 0; i < UDP_CT_MAX; i++) - un->timeouts[i] = udp_timeouts[i]; - } - - return udp_kmemdup_sysctl_table(pn, un); -} - -static struct nf_proto_net *udp_get_net_proto(struct net *net) -{ - return &net->ct.nf_ct_proto.udp.pn; + for (i = 0; i < UDP_CT_MAX; i++) + un->timeouts[i] = udp_timeouts[i]; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = { .l4proto = IPPROTO_UDP, .allow_clash = true, - .packet = udp_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, @@ -335,8 +288,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = udp_init_net, - .get_net_proto = udp_get_net_proto, }; #ifdef CONFIG_NF_CT_PROTO_UDPLITE @@ -344,7 +295,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .allow_clash = true, - .packet = udplite_packet, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, @@ -360,7 +310,5 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - .init_net = udp_init_net, - .get_net_proto = udp_get_net_proto, }; #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b6177fd73304..c2ae14c720b4 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -24,6 +24,10 @@ #include #include +static bool enable_hooks __read_mostly; +MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks"); +module_param(enable_hooks, bool, 0000); + unsigned int nf_conntrack_net_id __read_mostly; #ifdef CONFIG_NF_CONNTRACK_PROCFS @@ -310,8 +314,7 @@ static int ct_seq_show(struct seq_file *s, void *v) if (!net_eq(nf_ct_net(ct), net)) goto release; - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - WARN_ON(!l4proto); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u ", @@ -547,8 +550,55 @@ enum nf_ct_sysctl_index { #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_SYSCTL_CT_TIMESTAMP, #endif + NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS, + NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, + NF_SYSCTL_CT_PROTO_TCP_LOOSE, + NF_SYSCTL_CT_PROTO_TCP_LIBERAL, + NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS, + NF_SYSCTL_CT_PROTO_TIMEOUT_UDP, + NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, + NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, + NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, +#ifdef CONFIG_NF_CT_PROTO_SCTP + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, + NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING, + NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT, + NF_SYSCTL_CT_PROTO_DCCP_LOOSE, +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, + NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, +#endif + + __NF_SYSCTL_CT_LAST_SYSCTL, }; +#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1) + static struct ctl_table nf_ct_sysctl_table[] = { [NF_SYSCTL_CT_MAX] = { .procname = "nf_conntrack_max", @@ -626,7 +676,235 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, #endif - { } + [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { + .procname = "nf_conntrack_generic_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = { + .procname = "nf_conntrack_tcp_timeout_syn_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = { + .procname = "nf_conntrack_tcp_timeout_syn_recv", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = { + .procname = "nf_conntrack_tcp_timeout_established", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_fin_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_close_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = { + .procname = "nf_conntrack_tcp_timeout_last_ack", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = { + .procname = "nf_conntrack_tcp_timeout_time_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = { + .procname = "nf_conntrack_tcp_timeout_close", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = { + .procname = "nf_conntrack_tcp_timeout_max_retrans", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = { + .procname = "nf_conntrack_tcp_timeout_unacknowledged", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { + .procname = "nf_conntrack_tcp_loose", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { + .procname = "nf_conntrack_tcp_be_liberal", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { + .procname = "nf_conntrack_tcp_max_retrans", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = { + .procname = "nf_conntrack_udp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = { + .procname = "nf_conntrack_udp_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { + .procname = "nf_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = { + .procname = "nf_conntrack_icmpv6_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#ifdef CONFIG_NF_CT_PROTO_SCTP + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = { + .procname = "nf_conntrack_sctp_timeout_closed", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = { + .procname = "nf_conntrack_sctp_timeout_cookie_wait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = { + .procname = "nf_conntrack_sctp_timeout_cookie_echoed", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = { + .procname = "nf_conntrack_sctp_timeout_established", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_recd", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { + .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = { + .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { + .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { + .procname = "nf_conntrack_dccp_timeout_request", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = { + .procname = "nf_conntrack_dccp_timeout_respond", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = { + .procname = "nf_conntrack_dccp_timeout_partopen", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = { + .procname = "nf_conntrack_dccp_timeout_open", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = { + .procname = "nf_conntrack_dccp_timeout_closereq", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = { + .procname = "nf_conntrack_dccp_timeout_closing", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = { + .procname = "nf_conntrack_dccp_timeout_timewait", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = { + .procname = "nf_conntrack_dccp_loose", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = { + .procname = "nf_conntrack_gre_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = { + .procname = "nf_conntrack_gre_timeout_stream", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, +#endif + {} }; static struct ctl_table nf_ct_netfilter_table[] = { @@ -640,14 +918,103 @@ static struct ctl_table nf_ct_netfilter_table[] = { { } }; +static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, + struct ctl_table *table) +{ + struct nf_tcp_net *tn = nf_tcp_pernet(net); + +#define XASSIGN(XNAME, tn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \ + &(tn)->timeouts[TCP_CONNTRACK_ ## XNAME] + + XASSIGN(SYN_SENT, tn); + XASSIGN(SYN_RECV, tn); + XASSIGN(ESTABLISHED, tn); + XASSIGN(FIN_WAIT, tn); + XASSIGN(CLOSE_WAIT, tn); + XASSIGN(LAST_ACK, tn); + XASSIGN(TIME_WAIT, tn); + XASSIGN(CLOSE, tn); + XASSIGN(RETRANS, tn); + XASSIGN(UNACK, tn); +#undef XASSIGN +#define XASSIGN(XNAME, rval) \ + table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval) + + XASSIGN(LOOSE, &tn->tcp_loose); + XASSIGN(LIBERAL, &tn->tcp_be_liberal); + XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans); +#undef XASSIGN +} + +static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net *sn = nf_sctp_pernet(net); + +#define XASSIGN(XNAME, sn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \ + &(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME] + + XASSIGN(CLOSED, sn); + XASSIGN(COOKIE_WAIT, sn); + XASSIGN(COOKIE_ECHOED, sn); + XASSIGN(ESTABLISHED, sn); + XASSIGN(SHUTDOWN_SENT, sn); + XASSIGN(SHUTDOWN_RECD, sn); + XASSIGN(SHUTDOWN_ACK_SENT, sn); + XASSIGN(HEARTBEAT_SENT, sn); + XASSIGN(HEARTBEAT_ACKED, sn); +#undef XASSIGN +#endif +} + +static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net *dn = nf_dccp_pernet(net); + +#define XASSIGN(XNAME, dn) \ + table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \ + &(dn)->dccp_timeout[CT_DCCP_ ## XNAME] + + XASSIGN(REQUEST, dn); + XASSIGN(RESPOND, dn); + XASSIGN(PARTOPEN, dn); + XASSIGN(OPEN, dn); + XASSIGN(CLOSEREQ, dn); + XASSIGN(CLOSING, dn); + XASSIGN(TIMEWAIT, dn); +#undef XASSIGN + + table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose; +#endif +} + +static void nf_conntrack_standalone_init_gre_sysctl(struct net *net, + struct ctl_table *table) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_gre_net *gn = nf_gre_pernet(net); + + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED]; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED]; +#endif +} + static int nf_conntrack_standalone_init_sysctl(struct net *net) { + struct nf_udp_net *un = nf_udp_pernet(net); struct ctl_table *table; + BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL); + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), GFP_KERNEL); if (!table) - goto out_kmemdup; + return -ENOMEM; table[NF_SYSCTL_CT_COUNT].data = &net->ct.count; table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; @@ -655,6 +1022,16 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif + table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED]; + table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; + + nf_conntrack_standalone_init_tcp_sysctl(net, table); + nf_conntrack_standalone_init_sctp_sysctl(net, table); + nf_conntrack_standalone_init_dccp_sysctl(net, table); + nf_conntrack_standalone_init_gre_sysctl(net, table); /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { @@ -680,7 +1057,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) out_unregister_netfilter: kfree(table); -out_kmemdup: return -ENOMEM; } @@ -703,31 +1079,47 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net) } #endif /* CONFIG_SYSCTL */ +static void nf_conntrack_fini_net(struct net *net) +{ + if (enable_hooks) + nf_ct_netns_put(net, NFPROTO_INET); + + nf_conntrack_standalone_fini_proc(net); + nf_conntrack_standalone_fini_sysctl(net); +} + static int nf_conntrack_pernet_init(struct net *net) { int ret; - ret = nf_conntrack_init_net(net); + net->ct.sysctl_checksum = 1; + + ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) - goto out_init; + return ret; ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; - net->ct.sysctl_checksum = 1; - net->ct.sysctl_log_invalid = 0; - ret = nf_conntrack_standalone_init_sysctl(net); + ret = nf_conntrack_init_net(net); if (ret < 0) - goto out_sysctl; + goto out_init_net; + + if (enable_hooks) { + ret = nf_ct_netns_get(net, NFPROTO_INET); + if (ret < 0) + goto out_hooks; + } return 0; -out_sysctl: +out_hooks: + nf_conntrack_cleanup_net(net); +out_init_net: nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup_net(net); -out_init: + nf_conntrack_standalone_fini_sysctl(net); return ret; } @@ -735,10 +1127,9 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { struct net *net; - list_for_each_entry(net, net_exit_list, exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - } + list_for_each_entry(net, net_exit_list, exit_list) + nf_conntrack_fini_net(net); + nf_conntrack_cleanup_net_list(net_exit_list); } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index fa0844e2a68d..7aabfd4b1e50 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -28,6 +28,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; + struct dst_entry *other_dst = route->tuple[!dir].dst; struct dst_entry *dst = route->tuple[dir].dst; ft->dir = dir; @@ -50,8 +51,8 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ft->src_port = ctt->src.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port; - ft->iifidx = route->tuple[dir].ifindex; - ft->oifidx = route->tuple[!dir].ifindex; + ft->iifidx = other_dst->dev->ifindex; + ft->oifidx = dst->dev->ifindex; ft->dst_cache = dst; } @@ -120,7 +121,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct) if (l4num == IPPROTO_TCP) flow_offload_fixup_tcp(&ct->proto.tcp); - l4proto = __nf_ct_l4proto_find(l4num); + l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) return; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d159e9e7835b..35e61038ae96 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -146,7 +146,7 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) } /* Is this tuple already taken? (not by us) */ -int +static int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { @@ -158,10 +158,9 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, */ struct nf_conntrack_tuple reply; - nf_ct_invert_tuplepr(&reply, tuple); + nf_ct_invert_tuple(&reply, tuple); return nf_conntrack_tuple_taken(&reply, ignored_conntrack); } -EXPORT_SYMBOL(nf_nat_used_tuple); static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t, const struct nf_nat_range2 *range) @@ -253,7 +252,7 @@ find_appropriate_src(struct net *net, net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, + nf_ct_invert_tuple(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; @@ -560,8 +559,8 @@ nf_nat_setup_info(struct nf_conn *ct, * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - nf_ct_invert_tuplepr(&curr_tuple, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + nf_ct_invert_tuple(&curr_tuple, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); @@ -569,7 +568,7 @@ nf_nat_setup_info(struct nf_conn *ct, struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ - nf_ct_invert_tuplepr(&reply, &new_tuple); + nf_ct_invert_tuple(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ @@ -640,7 +639,7 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct, struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); if (!l3proto->manip_pkt(skb, 0, &target, mtype)) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b0a93300dd7..e92bedd09cde 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -37,10 +37,16 @@ enum { NFT_VALIDATE_DO, }; +static struct rhltable nft_objname_ht; + static u32 nft_chain_hash(const void *data, u32 len, u32 seed); static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *); +static u32 nft_objname_hash(const void *data, u32 len, u32 seed); +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed); +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *); + static const struct rhashtable_params nft_chain_ht_params = { .head_offset = offsetof(struct nft_chain, rhlhead), .key_offset = offsetof(struct nft_chain, name), @@ -51,6 +57,15 @@ static const struct rhashtable_params nft_chain_ht_params = { .automatic_shrinking = true, }; +static const struct rhashtable_params nft_objname_ht_params = { + .head_offset = offsetof(struct nft_object, rhlhead), + .key_offset = offsetof(struct nft_object, key), + .hashfn = nft_objname_hash, + .obj_hashfn = nft_objname_hash_obj, + .obj_cmpfn = nft_objname_hash_cmp, + .automatic_shrinking = true, +}; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { switch (net->nft.validate_state) { @@ -814,6 +829,34 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(chain->name, name); } +static u32 nft_objname_hash(const void *data, u32 len, u32 seed) +{ + const struct nft_object_hash_key *k = data; + + seed ^= hash_ptr(k->table, 32); + + return jhash(k->name, strlen(k->name), seed); +} + +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_object *obj = data; + + return nft_objname_hash(&obj->key, 0, seed); +} + +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_object_hash_key *k = arg->key; + const struct nft_object *obj = ptr; + + if (obj->key.table != k->table) + return -1; + + return strcmp(obj->key.name, k->name); +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -1070,7 +1113,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) return ERR_PTR(-ENOENT); } -static bool lockdep_commit_lock_is_held(struct net *net) +static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING return lockdep_is_held(&net->nft.commit_mutex); @@ -2304,7 +2347,6 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); unsigned int s_idx = cb->args[0]; const struct nft_rule *rule; - int rc = 1; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) @@ -2321,16 +2363,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NLM_F_MULTI | NLM_F_APPEND, table->family, table, chain, rule) < 0) - goto out_unfinished; + return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: (*idx)++; } - rc = 0; -out_unfinished: - cb->args[0] = *idx; - return rc; + return 0; } static int nf_tables_dump_rules(struct sk_buff *skb, @@ -2354,7 +2393,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->chain) { + if (ctx && ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, @@ -2382,6 +2421,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, } done: rcu_read_unlock(); + + cb->args[0] = idx; return skb->len; } @@ -2567,6 +2608,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) return 0; } +static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla); + #define NFT_RULE_MAXEXPRS 128 static int nf_tables_newrule(struct net *net, struct sock *nlsk, @@ -2636,6 +2680,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); } + } else if (nla[NFTA_RULE_POSITION_ID]) { + old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); + return PTR_ERR(old_rule); + } } } @@ -3853,7 +3903,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nla_put_string(skb, NFTA_SET_ELEM_OBJREF, - (*nft_set_ext_obj(ext))->name) < 0) + (*nft_set_ext_obj(ext))->key.name) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && @@ -4386,7 +4436,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EINVAL; goto err2; } - obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + obj = nft_obj_lookup(ctx->net, ctx->table, + nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -4508,6 +4559,8 @@ err6: err5: kfree(trans); err4: + if (obj) + obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) @@ -4819,18 +4872,36 @@ void nft_unregister_obj(struct nft_object_type *obj_type) } EXPORT_SYMBOL_GPL(nft_unregister_obj); -struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { + struct nft_object_hash_key k = { .table = table }; + char search[NFT_OBJ_MAXNAMELEN]; + struct rhlist_head *tmp, *list; struct nft_object *obj; - list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nla_strcmp(nla, obj->name) && - objtype == obj->ops->type->type && - nft_active_genmask(obj, genmask)) + nla_strlcpy(search, nla, sizeof(search)); + k.name = search; + + WARN_ON_ONCE(!rcu_read_lock_held() && + !lockdep_commit_lock_is_held(net)); + + rcu_read_lock(); + list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params); + if (!list) + goto out; + + rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) { + if (objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) { + rcu_read_unlock(); return obj; + } } +out: + rcu_read_unlock(); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_obj_lookup); @@ -4988,7 +5059,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err != -ENOENT) { @@ -5014,11 +5085,11 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, err = PTR_ERR(obj); goto err1; } - obj->table = table; + obj->key.table = table; obj->handle = nf_tables_alloc_handle(table); - obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); - if (!obj->name) { + obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->key.name) { err = -ENOMEM; goto err2; } @@ -5027,11 +5098,20 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, if (err < 0) goto err3; + err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, + nft_objname_ht_params); + if (err < 0) + goto err4; + list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; +err4: + /* queued in transaction log */ + INIT_LIST_HEAD(&obj->list); + return err; err3: - kfree(obj->name); + kfree(obj->key.name); err2: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); @@ -5060,7 +5140,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, nfmsg->res_id = htons(net->nft.base_seq & 0xffff); if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || - nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || @@ -5215,7 +5295,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return PTR_ERR(obj); @@ -5246,7 +5326,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) obj->ops->destroy(ctx, obj); module_put(obj->ops->type->owner); - kfree(obj->name); + kfree(obj->key.name); kfree(obj); } @@ -5280,7 +5360,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); } else { attr = nla[NFTA_OBJ_NAME]; - obj = nft_obj_lookup(table, attr, objtype, genmask); + obj = nft_obj_lookup(net, table, attr, objtype, genmask); } if (IS_ERR(obj)) { @@ -5297,7 +5377,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk, return nft_delobj(&ctx, obj); } -void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { @@ -6404,6 +6484,12 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) nf_tables_commit_chain_free_rules_old(g0); } +static void nft_obj_del(struct nft_object *obj) +{ + rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); + list_del_rcu(&obj->list); +} + static void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; @@ -6580,7 +6666,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELOBJ: - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; @@ -6716,7 +6802,7 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWOBJ: trans->ctx.table->use--; - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@ -7330,7 +7416,7 @@ static void __nft_release_tables(struct net *net) nft_set_destroy(set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { - list_del(&obj->list); + nft_obj_del(obj); table->use--; nft_obj_destroy(&ctx, obj); } @@ -7392,12 +7478,18 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err3; - /* must be last */ - err = nfnetlink_subsys_register(&nf_tables_subsys); + err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params); if (err < 0) goto err4; + /* must be last */ + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err5; + return err; +err5: + rhltable_destroy(&nft_objname_ht); err4: unregister_netdevice_notifier(&nf_tables_flowtable_notifier); err3: @@ -7417,6 +7509,7 @@ static void __exit nf_tables_module_exit(void) unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); + rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index a50500232b0a..2a00aef7b6d4 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -124,14 +124,25 @@ static void expr_call_ops_eval(const struct nft_expr *expr, struct nft_regs *regs, struct nft_pktinfo *pkt) { +#ifdef CONFIG_RETPOLINE unsigned long e = (unsigned long)expr->ops->eval; +#define X(e, fun) \ + do { if ((e) == (unsigned long)(fun)) \ + return fun(expr, regs, pkt); } while (0) - if (e == (unsigned long)nft_meta_get_eval) - nft_meta_get_eval(expr, regs, pkt); - else if (e == (unsigned long)nft_lookup_eval) - nft_lookup_eval(expr, regs, pkt); - else - expr->ops->eval(expr, regs, pkt); + X(e, nft_payload_eval); + X(e, nft_cmp_eval); + X(e, nft_meta_get_eval); + X(e, nft_lookup_eval); + X(e, nft_range_eval); + X(e, nft_immediate_eval); + X(e, nft_byteorder_eval); + X(e, nft_dynset_eval); + X(e, nft_rt_get_eval); + X(e, nft_bitwise_eval); +#undef X +#endif /* CONFIG_RETPOLINE */ + expr->ops->eval(expr, regs, pkt); } unsigned int diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 109b0d27345a..c69b11ca5aad 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -122,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, return -EBUSY; } - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supportted, skip. */ if (l4proto->l4proto != l4num) { @@ -152,7 +152,6 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, err: kfree(timeout); err_proto_put: - nf_ct_l4proto_put(l4proto); return ret; } @@ -302,7 +301,6 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); - nf_ct_l4proto_put(timeout->timeout.l4proto); nf_ct_untimeout(net, &timeout->timeout); kfree_rcu(timeout, rcu_head); } else { @@ -359,7 +357,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, return -EINVAL; l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supported, skip. */ if (l4proto->l4proto != l4num) { @@ -372,10 +370,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, if (ret < 0) goto err; - nf_ct_l4proto_put(l4proto); return 0; err: - nf_ct_l4proto_put(l4proto); return ret; } @@ -442,7 +438,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); err = -EOPNOTSUPP; if (l4proto->l4proto != l4num) @@ -474,12 +470,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, break; case IPPROTO_GRE: #ifdef CONFIG_NF_CT_PROTO_GRE - if (l4proto->net_id) { - struct netns_proto_gre *net_gre; - - net_gre = net_generic(net, *l4proto->net_id); - timeouts = net_gre->gre_timeouts; - } + timeouts = nf_gre_pernet(net)->timeouts; #endif break; case 255: @@ -516,7 +507,6 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, /* this avoids a loop in nfnetlink. */ return ret == -EAGAIN ? -ENOBUFS : ret; err: - nf_ct_l4proto_put(l4proto); return err; } @@ -597,7 +587,6 @@ static void __net_exit cttimeout_net_exit(struct net *net) list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { list_del_rcu(&cur->head); - nf_ct_l4proto_put(cur->timeout.l4proto); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 6f41dd74729d..1f1d90c1716b 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -66,6 +66,7 @@ static bool nf_osf_match_one(const struct sk_buff *skb, int ttl_check, struct nf_osf_hdr_ctx *ctx) { + const __u8 *optpinit = ctx->optp; unsigned int check_WSS = 0; int fmatch = FMATCH_WRONG; int foptsize, optnum; @@ -155,6 +156,9 @@ static bool nf_osf_match_one(const struct sk_buff *skb, } } + if (fmatch != FMATCH_OK) + ctx->optp = optpinit; + return fmatch == FMATCH_OK; } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index fff8073e2a56..2c75b9e0474e 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -25,9 +25,8 @@ struct nft_bitwise { struct nft_data xor; }; -static void nft_bitwise_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_bitwise_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); const u32 *src = ®s->data[priv->sreg]; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 13d4e421a6b3..19dbc34cc75e 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -26,9 +26,9 @@ struct nft_byteorder { u8 size; }; -static void nft_byteorder_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_byteorder_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 79d48c1d06f4..f9f1fa66a16e 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -24,9 +24,9 @@ struct nft_cmp_expr { enum nft_cmp_ops op:8; }; -static void nft_cmp_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_cmp_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7334e0b80a5e..5eb269428832 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -22,11 +22,15 @@ #include #include #include +#include struct nft_xt { struct list_head head; struct nft_expr_ops ops; - unsigned int refcnt; + refcount_t refcnt; + + /* used only when transaction mutex is locked */ + unsigned int listcnt; /* Unlike other expressions, ops doesn't have static storage duration. * nft core assumes they do. We use kfree_rcu so that nft core can @@ -43,10 +47,24 @@ struct nft_xt_match_priv { void *info; }; +struct nft_compat_net { + struct list_head nft_target_list; + struct list_head nft_match_list; +}; + +static unsigned int nft_compat_net_id __read_mostly; +static struct nft_expr_type nft_match_type; +static struct nft_expr_type nft_target_type; + +static struct nft_compat_net *nft_compat_pernet(struct net *net) +{ + return net_generic(net, nft_compat_net_id); +} + static bool nft_xt_put(struct nft_xt *xt) { - if (--xt->refcnt == 0) { - list_del(&xt->head); + if (refcount_dec_and_test(&xt->refcnt)) { + WARN_ON_ONCE(!list_empty(&xt->head)); kfree_rcu(xt, rcu_head); return true; } @@ -273,7 +291,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt->refcnt++; + refcount_inc(&nft_xt->refcnt); return 0; } @@ -486,7 +504,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt->refcnt++; + refcount_inc(&nft_xt->refcnt); return 0; } @@ -540,6 +558,43 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } +static void nft_compat_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + struct list_head *h) +{ + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + + if (xt->listcnt == 0) + list_add(&xt->head, h); + + xt->listcnt++; +} + +static void nft_compat_activate_mt(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_compat_net *cn = nft_compat_pernet(ctx->net); + + nft_compat_activate(ctx, expr, &cn->nft_match_list); +} + +static void nft_compat_activate_tg(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_compat_net *cn = nft_compat_pernet(ctx->net); + + nft_compat_activate(ctx, expr, &cn->nft_target_list); +} + +static void nft_compat_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + + if (--xt->listcnt == 0) + list_del_init(&xt->head); +} + static void nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -734,10 +789,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { .cb = nfnl_nft_compat_cb, }; -static LIST_HEAD(nft_match_list); - -static struct nft_expr_type nft_match_type; - static bool nft_match_cmp(const struct xt_match *match, const char *name, u32 rev, u32 family) { @@ -749,6 +800,7 @@ static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { + struct nft_compat_net *cn; struct nft_xt *nft_match; struct xt_match *match; unsigned int matchsize; @@ -765,8 +817,10 @@ nft_match_select_ops(const struct nft_ctx *ctx, rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); family = ctx->family; + cn = nft_compat_pernet(ctx->net); + /* Re-use the existing match if it's already loaded. */ - list_for_each_entry(nft_match, &nft_match_list, head) { + list_for_each_entry(nft_match, &cn->nft_match_list, head) { struct xt_match *match = nft_match->ops.data; if (nft_match_cmp(match, mt_name, rev, family)) @@ -789,11 +843,13 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 0; + refcount_set(&nft_match->refcnt, 0); nft_match->ops.type = &nft_match_type; nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.activate = nft_compat_activate_mt; + nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; @@ -810,7 +866,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - list_add(&nft_match->head, &nft_match_list); + nft_match->listcnt = 1; + list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; err: @@ -826,10 +883,6 @@ static struct nft_expr_type nft_match_type __read_mostly = { .owner = THIS_MODULE, }; -static LIST_HEAD(nft_target_list); - -static struct nft_expr_type nft_target_type; - static bool nft_target_cmp(const struct xt_target *tg, const char *name, u32 rev, u32 family) { @@ -841,6 +894,7 @@ static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { + struct nft_compat_net *cn; struct nft_xt *nft_target; struct xt_target *target; char *tg_name; @@ -861,8 +915,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, strcmp(tg_name, "standard") == 0) return ERR_PTR(-EINVAL); + cn = nft_compat_pernet(ctx->net); /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_target_list, head) { + list_for_each_entry(nft_target, &cn->nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (!target->target) @@ -893,11 +948,13 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 0; + refcount_set(&nft_target->refcnt, 0); nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.activate = nft_compat_activate_tg; + nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; @@ -907,7 +964,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - list_add(&nft_target->head, &nft_target_list); + nft_target->listcnt = 1; + list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; err: @@ -923,13 +981,74 @@ static struct nft_expr_type nft_target_type __read_mostly = { .owner = THIS_MODULE, }; +static int __net_init nft_compat_init_net(struct net *net) +{ + struct nft_compat_net *cn = nft_compat_pernet(net); + + INIT_LIST_HEAD(&cn->nft_target_list); + INIT_LIST_HEAD(&cn->nft_match_list); + + return 0; +} + +static void __net_exit nft_compat_exit_net(struct net *net) +{ + struct nft_compat_net *cn = nft_compat_pernet(net); + struct nft_xt *xt, *next; + + if (list_empty(&cn->nft_match_list) && + list_empty(&cn->nft_target_list)) + return; + + /* If there was an error that caused nft_xt expr to not be initialized + * fully and noone else requested the same expression later, the lists + * contain 0-refcount entries that still hold module reference. + * + * Clean them here. + */ + mutex_lock(&net->nft.commit_mutex); + list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + list_del_init(&xt->head); + + if (refcount_read(&xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + list_del_init(&xt->head); + + if (refcount_read(&xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } + mutex_unlock(&net->nft.commit_mutex); +} + +static struct pernet_operations nft_compat_net_ops = { + .init = nft_compat_init_net, + .exit = nft_compat_exit_net, + .id = &nft_compat_net_id, + .size = sizeof(struct nft_compat_net), +}; + static int __init nft_compat_module_init(void) { int ret; + ret = register_pernet_subsys(&nft_compat_net_ops); + if (ret < 0) + goto err_target; + ret = nft_register_expr(&nft_match_type); if (ret < 0) - return ret; + goto err_pernet; ret = nft_register_expr(&nft_target_type); if (ret < 0) @@ -942,45 +1061,21 @@ static int __init nft_compat_module_init(void) } return ret; - err_target: nft_unregister_expr(&nft_target_type); err_match: nft_unregister_expr(&nft_match_type); +err_pernet: + unregister_pernet_subsys(&nft_compat_net_ops); return ret; } static void __exit nft_compat_module_exit(void) { - struct nft_xt *xt, *next; - - /* list should be empty here, it can be non-empty only in case there - * was an error that caused nft_xt expr to not be initialized fully - * and noone else requested the same expression later. - * - * In this case, the lists contain 0-refcount entries that still - * hold module reference. - */ - list_for_each_entry_safe(xt, next, &nft_target_list, head) { - struct xt_target *target = xt->ops.data; - - if (WARN_ON_ONCE(xt->refcnt)) - continue; - module_put(target->me); - kfree(xt); - } - - list_for_each_entry_safe(xt, next, &nft_match_list, head) { - struct xt_match *match = xt->ops.data; - - if (WARN_ON_ONCE(xt->refcnt)) - continue; - module_put(match->me); - kfree(xt); - } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); + unregister_pernet_subsys(&nft_compat_net_ops); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index a61d7edfc290..1a6b06ce6b5b 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -104,7 +104,7 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx, nft_counter_do_destroy(priv); } -static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv, +static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 586627c361df..7b717fad6cdc 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -870,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]); priv->l4proto = l4num; - l4proto = nf_ct_l4proto_find_get(l4num); + l4proto = nf_ct_l4proto_find(l4num); if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; @@ -902,7 +902,6 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, err_free_timeout: kfree(timeout); err_proto_put: - nf_ct_l4proto_put(l4proto); return ret; } @@ -913,7 +912,6 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx, struct nf_ct_timeout *timeout = priv->timeout; nf_ct_untimeout(ctx->net, timeout); - nf_ct_l4proto_put(timeout->l4proto); nf_ct_netns_put(ctx->net, ctx->family); kfree(priv->timeout); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 07d4efd3d851..9658493d37d4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -62,9 +62,8 @@ err1: return NULL; } -static void nft_dynset_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 974525eb92df..6e6b9adf7d38 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -12,6 +12,7 @@ #include #include #include +#include struct nft_flow_offload { struct nft_flowtable *flowtable; @@ -29,10 +30,12 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, memset(&fl, 0, sizeof(fl)); switch (nft_pf(pkt)) { case NFPROTO_IPV4: - fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; + fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; break; case NFPROTO_IPV6: - fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; + fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; break; } @@ -41,9 +44,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, return -ENOENT; route->tuple[dir].dst = this_dst; - route->tuple[dir].ifindex = nft_in(pkt)->ifindex; route->tuple[!dir].dst = other_dst; - route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; return 0; } @@ -66,6 +67,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; + const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; @@ -88,7 +90,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, goto out; } - if (test_bit(IPS_HELPER_BIT, &ct->status)) + help = nfct_help(ct); + if (help) goto out; if (ctinfo == IP_CT_NEW || diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c2d237144f74..ea658e6c53e3 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -25,7 +25,6 @@ struct nft_jhash { u32 modulus; u32 seed; u32 offset; - struct nft_set *map; }; static void nft_jhash_eval(const struct nft_expr *expr, @@ -42,33 +41,10 @@ static void nft_jhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } -static void nft_jhash_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - const void *data = ®s->data[priv->sreg]; - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = reciprocal_scale(jhash(data, priv->len, priv->seed), - priv->modulus) + priv->offset; - - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - struct nft_symhash { enum nft_registers dreg:8; u32 modulus; u32 offset; - struct nft_set *map; }; static void nft_symhash_eval(const struct nft_expr *expr, @@ -84,28 +60,6 @@ static void nft_symhash_eval(const struct nft_expr *expr, regs->data[priv->dreg] = h + priv->offset; } -static void nft_symhash_map_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) -{ - struct nft_symhash *priv = nft_expr_priv(expr); - struct sk_buff *skb = pkt->skb; - const struct nft_set *map = priv->map; - const struct nft_set_ext *ext; - u32 result; - bool found; - - result = reciprocal_scale(__skb_get_hash_symmetric(skb), - priv->modulus) + priv->offset; - - found = map->ops->lookup(nft_net(pkt), map, &result, &ext); - if (!found) - return; - - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), map->dlen); -} - static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SREG] = { .type = NLA_U32 }, [NFTA_HASH_DREG] = { .type = NLA_U32 }, @@ -114,9 +68,6 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_SEED] = { .type = NLA_U32 }, [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, [NFTA_HASH_TYPE] = { .type = NLA_U32 }, - [NFTA_HASH_SET_NAME] = { .type = NLA_STRING, - .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_HASH_SET_ID] = { .type = NLA_U32 }, }; static int nft_jhash_init(const struct nft_ctx *ctx, @@ -166,20 +117,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_jhash_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_jhash_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_HASH_SET_NAME], - tb[NFTA_HASH_SET_ID], genmask); - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_symhash_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -206,20 +143,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, sizeof(u32)); } -static int nft_symhash_map_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_jhash *priv = nft_expr_priv(expr); - u8 genmask = nft_genmask_next(ctx->net); - - nft_symhash_init(ctx, expr, tb); - priv->map = nft_set_lookup_global(ctx->net, ctx->table, - tb[NFTA_HASH_SET_NAME], - tb[NFTA_HASH_SET_ID], genmask); - return PTR_ERR_OR_ZERO(priv->map); -} - static int nft_jhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -247,18 +170,6 @@ nla_put_failure: return -1; } -static int nft_jhash_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_jhash *priv = nft_expr_priv(expr); - - if (nft_jhash_dump(skb, expr) || - nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) - return -1; - - return 0; -} - static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr) { @@ -279,18 +190,6 @@ nla_put_failure: return -1; } -static int nft_symhash_map_dump(struct sk_buff *skb, - const struct nft_expr *expr) -{ - const struct nft_symhash *priv = nft_expr_priv(expr); - - if (nft_symhash_dump(skb, expr) || - nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name)) - return -1; - - return 0; -} - static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -300,14 +199,6 @@ static const struct nft_expr_ops nft_jhash_ops = { .dump = nft_jhash_dump, }; -static const struct nft_expr_ops nft_jhash_map_ops = { - .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)), - .eval = nft_jhash_map_eval, - .init = nft_jhash_map_init, - .dump = nft_jhash_map_dump, -}; - static const struct nft_expr_ops nft_symhash_ops = { .type = &nft_hash_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), @@ -316,14 +207,6 @@ static const struct nft_expr_ops nft_symhash_ops = { .dump = nft_symhash_dump, }; -static const struct nft_expr_ops nft_symhash_map_ops = { - .type = &nft_hash_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)), - .eval = nft_symhash_map_eval, - .init = nft_symhash_map_init, - .dump = nft_symhash_map_dump, -}; - static const struct nft_expr_ops * nft_hash_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -336,12 +219,8 @@ nft_hash_select_ops(const struct nft_ctx *ctx, type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE])); switch (type) { case NFT_HASH_SYM: - if (tb[NFTA_HASH_SET_NAME]) - return &nft_symhash_map_ops; return &nft_symhash_ops; case NFT_HASH_JENKINS: - if (tb[NFTA_HASH_SET_NAME]) - return &nft_jhash_map_ops; return &nft_jhash_ops; default: break; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0777a93211e2..3e5ed787b1d4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -17,9 +17,9 @@ #include #include -static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 6df486c5ebd3..987d2d6ce624 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -244,6 +244,16 @@ void nft_meta_get_eval(const struct nft_expr *expr, strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); return; #endif + case NFT_META_IIFKIND: + if (in == NULL || in->rtnl_link_ops == NULL) + goto err; + strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ); + break; + case NFT_META_OIFKIND: + if (out == NULL || out->rtnl_link_ops == NULL) + goto err; + strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); + break; default: WARN_ON(1); goto err; @@ -340,6 +350,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx, break; case NFT_META_IIFNAME: case NFT_META_OIFNAME: + case NFT_META_IIFKIND: + case NFT_META_OIFKIND: len = IFNAMSIZ; break; case NFT_META_PRANDOM: diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index a3185ca2a3a9..c1f2adf198a0 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -38,7 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx, return -EINVAL; objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); - obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + obj = nft_obj_lookup(ctx->net, ctx->table, + tb[NFTA_OBJREF_IMM_NAME], objtype, genmask); if (IS_ERR(obj)) return -ENOENT; @@ -53,7 +54,7 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_object *obj = nft_objref_priv(expr); - if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->ops->type->type))) goto nla_put_failure; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58..54e15de4b79a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -70,9 +70,9 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; } -static void nft_payload_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_payload_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 0ed124a93fcf..354cde67bca9 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -61,7 +61,7 @@ static void nft_quota_obj_eval(struct nft_object *obj, if (overquota && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) - nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0, + nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); } diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index cedb96c3619f..529ac8acb19d 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -23,9 +23,8 @@ struct nft_range_expr { enum nft_range_ops op:8; }; -static void nft_range_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); int d1, d2; diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index f35fa33913ae..c48daed5c46b 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -53,9 +53,9 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb return mtu - minlen; } -static void nft_rt_get_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_rt_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_rt *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index e8da9a9bba73..55af9f247993 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -180,6 +180,25 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, } EXPORT_SYMBOL_GPL(nf_route); +static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) +{ +#ifdef CONFIG_INET + const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct iphdr *iph = ip_hdr(skb); + + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) + return ip_route_me_harder(entry->state.net, skb, + RTN_UNSPEC); + } +#endif + return 0; +} + int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { const struct nf_ipv6_ops *v6ops; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 2c7a4b80206f..0fa863f57575 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, /* Make sure the timeout policy matches any existing protocol tracker, * otherwise default to generic. */ - l4proto = __nf_ct_l4proto_find(proto); + l4proto = nf_ct_l4proto_find(proto); if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4034d70bff39..b2e39cb6a590 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -96,8 +96,7 @@ match_outdev: static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - - br_netfilter_enable(); + static bool brnf_probed __read_mostly; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -111,6 +110,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } + + if (!brnf_probed) { + brnf_probed = true; + request_module("br_netfilter"); + } + return 0; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 3c023d6120f6..8fa35df94c07 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group) } EXPORT_SYMBOL_GPL(netlink_has_listeners); +bool netlink_strict_get_check(struct sk_buff *skb) +{ + const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk); + + return nlk->flags & NETLINK_F_STRICT_CHK; +} +EXPORT_SYMBOL_GPL(netlink_strict_get_check); + static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) { struct netlink_sock *nlk = nlk_sk(sk); diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index cbd51ed5a2d7..908e53ab47a4 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t1timer, jiffies + nr->t1); + sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t2timer, jiffies + nr->t2); + sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t4timer, jiffies + nr->t4); + sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) @@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk) struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) - mod_timer(&nr->idletimer, jiffies + nr->idle); + sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { - mod_timer(&sk->sk_timer, jiffies + 5 * HZ); + sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t1timer); + sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t2timer); + sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t4timer); + sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { - del_timer(&nr_sk(sk)->idletimer); + sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cd94f925495a..35884f836260 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -622,7 +622,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, if (natted) { struct nf_conntrack_tuple inverse; - if (!nf_ct_invert_tuplepr(&inverse, &tuple)) { + if (!nf_ct_invert_tuple(&inverse, &tuple)) { pr_debug("ovs_ct_find_existing: Inversion failed!\n"); return NULL; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 435a4bdf8f89..691da853bef5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d0945253f43b..3b1a78906bc0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2887,7 +2887,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); - if (len < reserve) + if (len < reserve + sizeof(struct ipv6hdr) && + dev->min_header_len != dev->hard_header_len) skb_reset_network_header(skb); } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 77e9f85a2c92..f2ff21d7df08 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -850,6 +850,7 @@ void rose_link_device_down(struct net_device *dev) /* * Route a frame to an appropriate AX.25 connection. + * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { @@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (skb->len < ROSE_MIN_LEN) return res; + + if (!ax25) + return rose_loopback_queue(skb, NULL); + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index a2522f9d71e2..96f2952bbdfd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -418,76 +418,6 @@ u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) } EXPORT_SYMBOL(rxrpc_kernel_get_epoch); -/** - * rxrpc_kernel_check_call - Check a call's state - * @sock: The socket the call is on - * @call: The call to check - * @_compl: Where to store the completion state - * @_abort_code: Where to store any abort code - * - * Allow a kernel service to query the state of a call and find out the manner - * of its termination if it has completed. Returns -EINPROGRESS if the call is - * still going, 0 if the call finished successfully, -ECONNABORTED if the call - * was aborted and an appropriate error if the call failed in some other way. - */ -int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, u32 *_abort_code) -{ - if (call->state != RXRPC_CALL_COMPLETE) - return -EINPROGRESS; - smp_rmb(); - *_compl = call->completion; - *_abort_code = call->abort_code; - return call->error; -} -EXPORT_SYMBOL(rxrpc_kernel_check_call); - -/** - * rxrpc_kernel_retry_call - Allow a kernel service to retry a call - * @sock: The socket the call is on - * @call: The call to retry - * @srx: The address of the peer to contact - * @key: The security context to use (defaults to socket setting) - * - * Allow a kernel service to try resending a client call that failed due to a - * network error to a new address. The Tx queue is maintained intact, thereby - * relieving the need to re-encrypt any request data that has already been - * buffered. - */ -int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, struct key *key) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; - - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - - if (!key) - key = rx->key; - if (key && !key->payload.data[0]) - key = NULL; /* a no-security key */ - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = key; - cp.security_level = 0; - cp.exclusive = false; - cp.service_id = srx->srx_service; - - mutex_lock(&call->user_mutex); - - ret = rxrpc_prepare_call_for_retry(rx, call); - if (ret == 0) - ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL); - - mutex_unlock(&call->user_mutex); - rxrpc_put_peer(cp.peer); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_retry_call); - /** * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bc628acf4f4f..4b1a534d290a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -476,7 +476,6 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ - RXRPC_CALL_TX_LASTQ, /* Last packet has been queued */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ @@ -517,6 +516,18 @@ enum rxrpc_call_state { NR__RXRPC_CALL_STATES }; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + /* * Call Tx congestion management modes. */ @@ -761,15 +772,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); -int rxrpc_retry_client_call(struct rxrpc_sock *, - struct rxrpc_call *, - struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, - gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8f1a8f85b1f9..8aa2937b069f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -324,48 +324,6 @@ error: return ERR_PTR(ret); } -/* - * Retry a call to a new address. It is expected that the Tx queue of the call - * will contain data previously packaged for an old call. - */ -int rxrpc_retry_client_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) -{ - const void *here = __builtin_return_address(0); - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); - if (ret < 0) - goto error; - - trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, NULL); - - rxrpc_start_call_timer(call); - - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); - - _leave(" = 0"); - return 0; - -error: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), - here, ERR_PTR(ret)); - _leave(" = %d", ret); - return ret; -} - /* * Set up an incoming call. call->conn points to the connection. * This is called in BH context and isn't allowed to fail. @@ -533,61 +491,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _leave(""); } -/* - * Prepare a kernel service call for retry. - */ -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int i; - u8 last = 0; - - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - - trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), - here, (const void *)call->flags); - - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED); - ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED); - ASSERT(list_empty(&call->recvmsg_link)); - - del_timer_sync(&call->timer); - - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn); - - if (call->conn) - rxrpc_disconnect_call(call); - - if (rxrpc_is_service_call(call) || - !call->tx_phase || - call->tx_hard_ack != 0 || - call->rx_hard_ack != 0 || - call->rx_top != 0) - return -EINVAL; - - call->state = RXRPC_CALL_UNINITIALISED; - call->completion = RXRPC_CALL_SUCCEEDED; - call->call_id = 0; - call->cid = 0; - call->cong_cwnd = 0; - call->cong_extra = 0; - call->cong_ssthresh = 0; - call->cong_mode = 0; - call->cong_dup_acks = 0; - call->cong_cumul_acks = 0; - call->acks_lowest_nak = 0; - - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - last |= call->rxtx_annotations[i]; - call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST; - call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS; - } - - _leave(" = 0"); - return 0; -} - /* * release all the calls associated with a socket */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 521189f4b666..b2adfa825363 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -562,10 +562,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); write_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); rxrpc_see_call(call); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index be01f9c5d963..46c9312085b1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -169,10 +169,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, ASSERTCMP(seq, ==, call->tx_top + 1); - if (last) { + if (last) annotation |= RXRPC_TX_ANNO_LAST; - set_bit(RXRPC_CALL_TX_LASTQ, &call->flags); - } /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. @@ -386,6 +384,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_total_len -= copy; } + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (msg_data_left(msg) == 0 && !more)) { @@ -425,16 +428,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, notify_end_tx); skb = NULL; } - - /* Check for the far side aborting the call or a network error - * occurring. If this happens, save any packet that was under - * construction so that in the case of a network error, the - * call can be retried or redirected. - */ - if (call->state == RXRPC_CALL_COMPLETE) { - ret = call->error; - goto out; - } } while (msg_data_left(msg) > 0); success: @@ -444,6 +437,11 @@ out: _leave(" = %d", ret); return ret; +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(" = %d", call->error); + return call->error; + maybe_error: if (copied) goto success; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c3b90fadaff6..8b43fe0130f7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -197,6 +197,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; +static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) +{ + if (!p) + return; + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(&p->tcft_enc_metadata->dst); + kfree_rcu(p, rcu); +} + static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); - if (params_new) - kfree_rcu(params_new, rcu); + tunnel_key_release_params(params_new); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -385,12 +393,7 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); - if (params) { - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); - } + tunnel_key_release_params(params); } static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8ce2a0507970..e2b5cb2eb34e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1277,7 +1277,6 @@ EXPORT_SYMBOL(tcf_block_cb_unregister); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; const struct tcf_proto *orig_tp = tp; @@ -1287,6 +1286,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -1319,7 +1319,6 @@ reset: } tp = first_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6a5dce8baf19..4a57fec6f306 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; + struct tc_basic_pcnt __percpu *pf; struct rcu_work rwork; }; @@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct basic_filter *f; list_for_each_entry_rcu(f, &head->flist, link) { + __this_cpu_inc(f->pf->rcnt); if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; + __this_cpu_inc(f->pf->rhit); *res = f->res; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) @@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f) tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); + free_percpu(f->pf); kfree(f); } @@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err) goto errout; fnew->handle = handle; + fnew->pf = alloc_percpu(struct tc_basic_pcnt); + if (!fnew->pf) { + err = -ENOMEM; + goto errout; + } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, extack); @@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + free_percpu(fnew->pf); tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; @@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { + struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; struct nlattr *nest; + int cpu; if (f == NULL) return skb->len; @@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; + for_each_possible_cpu(cpu) { + struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu); + + gpf.rcnt += pf->rcnt; + gpf.rhit += pf->rhit; + } + + if (nla_put_64bit(skb, TCA_BASIC_PCNT, + sizeof(struct tc_basic_pcnt), + &gpf, TCA_BASIC_PAD)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index dad04e710493..f6aa57fbbbaf 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1290,17 +1290,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; + struct fl_flow_mask *mask; struct nlattr **tb; - struct fl_flow_mask mask = {}; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); - if (!tb) + mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); + if (!mask) return -ENOBUFS; + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) { + err = -ENOBUFS; + goto errout_mask_alloc; + } + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) @@ -1343,12 +1349,12 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) goto errout_idr; - err = fl_check_assign_mask(head, fnew, fold, &mask); + err = fl_check_assign_mask(head, fnew, fold, mask); if (err) goto errout_idr; @@ -1392,6 +1398,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } kfree(tb); + kfree(mask); return 0; errout_mask: @@ -1405,6 +1412,8 @@ errout: kfree(fnew); errout_tb: kfree(tb); +errout_mask_alloc: + kfree(mask); return err; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7e4d1ccf4c87..03e26e8d0ec9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -758,8 +758,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return 0; } -void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, - unsigned int len) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index b910cd5c56f7..73940293700d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1667,7 +1667,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); - unsigned int slen = 0; + unsigned int slen = 0, numsegs = 0; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) @@ -1683,6 +1683,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow_queue_add(flow, segs); sch->q.qlen++; + numsegs++; slen += segs->len; q->buffer_used += segs->truesize; b->packets++; @@ -1696,7 +1697,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, sch->qstats.backlog += slen; q->avg_window_bytes += slen; - qdisc_tree_reduce_backlog(sch, 1, len); + qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); } else { /* not splitting */ diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index e689e11b6d0f..c6a502933fe7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -88,13 +88,14 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); int err; err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index cdebaed0f8cf..09b800991065 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -350,9 +350,11 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; + bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -362,6 +364,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -371,12 +374,12 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (cl->qdisc->q.qlen == 1) { + if (first) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return err; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f6f480784bc6..42471464ded3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -199,6 +199,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; @@ -271,7 +272,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b18ec1f6de60..24cc220a3218 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1539,8 +1539,10 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; int uninitialized_var(err); + bool first; cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1550,6 +1552,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -1559,9 +1562,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (cl->qdisc->q.qlen == 1) { - unsigned int len = qdisc_pkt_len(skb); - + if (first) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) @@ -1576,7 +1577,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 58b449490757..30f9da7e1076 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -581,6 +581,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { int uninitialized_var(ret); + unsigned int len = qdisc_pkt_len(skb); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -610,7 +611,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, htb_activate(q, cl); } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cdf68706e40f..847141cd900f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -72,6 +72,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; int ret; @@ -88,7 +89,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index dc37c4ead439..29f5c4a24688 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1210,10 +1210,12 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb), gso_segs; struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; + bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1224,17 +1226,18 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); - if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) { + if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", - cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); - err = qfq_change_agg(sch, cl, cl->agg->class_weight, - qdisc_pkt_len(skb)); + cl->agg->lmax, len, cl->common.classid); + err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } } + gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1245,16 +1248,17 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - bstats_update(&cl->bstats, skb); - qdisc_qstats_backlog_inc(sch, skb); + cl->bstats.bytes += len; + cl->bstats.packets += gso_segs; + sch->qstats.backlog += len; ++sch->q.qlen; agg = cl->agg; /* if the queue was not empty, then done here */ - if (cl->qdisc->q.qlen != 1) { + if (!first) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) - == cl && cl->deficit < qdisc_pkt_len(skb)) + == cl && cl->deficit < len) list_move_tail(&cl->alist, &agg->active); return err; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 942dcca09cf2..7f272a9070c5 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -185,6 +185,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { @@ -200,7 +201,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 201c888604e4..d2c7d0d2abc1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -101,7 +101,7 @@ static struct sctp_association *sctp_association_init( * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = net->sctp.pf_retrans; + asoc->pf_retrans = sp->pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -1651,8 +1651,11 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) if (preload) idr_preload(gfp); spin_lock_bh(&sctp_assocs_id_lock); - /* 0 is not a valid assoc_id, must be >= 1 */ - ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT); + /* 0, 1, 2 are used as SCTP_FUTURE_ASSOC, SCTP_CURRENT_ASSOC and + * SCTP_ALL_ASSOC, so an available id must be > SCTP_ALL_ASSOC. + */ + ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, SCTP_ALL_ASSOC + 1, 0, + GFP_NOWAIT); spin_unlock_bh(&sctp_assocs_id_lock); if (preload) idr_preload_end(); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index b9ed271b7ef7..6200cd2b4b99 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -97,11 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; - addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; @@ -282,7 +280,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (saddr) { fl6->saddr = saddr->v6.sin6_addr; - fl6->fl6_sport = saddr->v6.sin6_port; + if (!fl6->fl6_sport) + fl6->fl6_sport = saddr->v6.sin6_port; pr_debug("src=%pI6 - ", &fl6->saddr); } @@ -434,7 +433,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c37e1c2dec9d..fd33281999b5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT); + sctp_sched_set_sched(asoc, sctp_sk(asoc->base.sk)->default_ss); } /* Free the outqueue structure and any related pending chunks. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d5878ae55840..6abc8b274270 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -101,7 +101,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -441,7 +440,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } if (saddr) { fl4->saddr = saddr->v4.sin_addr.s_addr; - fl4->fl4_sport = saddr->v4.sin_port; + if (!fl4->fl4_sport) + fl4->fl4_sport = saddr->v4.sin_port; } pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr, @@ -776,10 +776,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index f4ac6c592e13..d05c57664e36 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -495,7 +495,10 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, * * [INIT ACK back to where the INIT came from.] */ - retval->transport = chunk->transport; + if (chunk->transport) + retval->transport = + sctp_assoc_lookup_paddr(asoc, + &chunk->transport->ipaddr); retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(initack), &initack); @@ -642,8 +645,10 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, * * [COOKIE ACK back to where the COOKIE ECHO came from.] */ - if (retval && chunk) - retval->transport = chunk->transport; + if (retval && chunk && chunk->transport) + retval->transport = + sctp_assoc_lookup_paddr(asoc, + &chunk->transport->ipaddr); return retval; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f93c3cf9e567..9644bdc8e85c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -248,7 +248,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) } /* Otherwise this is a UDP-style socket. */ - if (!id || (id == (sctp_assoc_t)-1)) + if (id <= SCTP_ALL_ASSOC) return NULL; spin_lock_bh(&sctp_assocs_id_lock); @@ -2750,12 +2750,13 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return -EINVAL; } - /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Heartbeat demand can only be sent on a transport or @@ -2797,6 +2798,43 @@ static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; } +static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, + struct sctp_association *asoc) +{ + struct sctp_transport *trans; + + if (params->sack_delay) { + asoc->sackdelay = msecs_to_jiffies(params->sack_delay); + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + if (params->sack_freq == 1) { + asoc->param_flags = + sctp_spp_sackdelay_disable(asoc->param_flags); + } else if (params->sack_freq > 1) { + asoc->sackfreq = params->sack_freq; + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (params->sack_delay) { + trans->sackdelay = msecs_to_jiffies(params->sack_delay); + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + if (params->sack_freq == 1) { + trans->param_flags = + sctp_spp_sackdelay_disable(trans->param_flags); + } else if (params->sack_freq > 1) { + trans->sackfreq = params->sack_freq; + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + } +} + /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * @@ -2836,10 +2874,9 @@ static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sack_info params; - struct sctp_transport *trans = NULL; - struct sctp_association *asoc = NULL; - struct sctp_sock *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sack_info params; if (optlen == sizeof(struct sctp_sack_info)) { if (copy_from_user(¶ms, optval, optlen)) @@ -2867,67 +2904,42 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay > 500) return -EINVAL; - /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - if (params.sack_delay) { - if (asoc) { - asoc->sackdelay = - msecs_to_jiffies(params.sack_delay); - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + if (asoc) { + sctp_apply_asoc_delayed_ack(¶ms, asoc); + + return 0; + } + + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) { + if (params.sack_delay) { sp->sackdelay = params.sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } - } - - if (params.sack_freq == 1) { - if (asoc) { - asoc->param_flags = - sctp_spp_sackdelay_disable(asoc->param_flags); - } else { + if (params.sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); - } - } else if (params.sack_freq > 1) { - if (asoc) { - asoc->sackfreq = params.sack_freq; - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + } else if (params.sack_freq > 1) { sp->sackfreq = params.sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } } - /* If change is for association, also apply to each transport. */ - if (asoc) { - list_for_each_entry(trans, &asoc->peer.transport_addr_list, - transports) { - if (params.sack_delay) { - trans->sackdelay = - msecs_to_jiffies(params.sack_delay); - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - if (params.sack_freq == 1) { - trans->param_flags = - sctp_spp_sackdelay_disable(trans->param_flags); - } else if (params.sack_freq > 1) { - trans->sackfreq = params.sack_freq; - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - } - } + if (params.sack_assoc_id == SCTP_CURRENT_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + sctp_apply_asoc_delayed_ack(¶ms, asoc); return 0; } @@ -2997,15 +3009,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return -EINVAL; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; asoc->default_ppid = info.sinfo_ppid; asoc->default_context = info.sinfo_context; asoc->default_timetolive = info.sinfo_timetolive; - } else { + + return 0; + } + + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; sp->default_flags = info.sinfo_flags; sp->default_ppid = info.sinfo_ppid; @@ -3013,6 +3032,17 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, sp->default_timetolive = info.sinfo_timetolive; } + if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.sinfo_stream; + asoc->default_flags = info.sinfo_flags; + asoc->default_ppid = info.sinfo_ppid; + asoc->default_context = info.sinfo_context; + asoc->default_timetolive = info.sinfo_timetolive; + } + } + return 0; } @@ -3037,20 +3067,37 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, return -EINVAL; asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.snd_sid; asoc->default_flags = info.snd_flags; asoc->default_ppid = info.snd_ppid; asoc->default_context = info.snd_context; - } else { + + return 0; + } + + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; sp->default_flags = info.snd_flags; sp->default_ppid = info.snd_ppid; sp->default_context = info.snd_context; } + if (info.snd_assoc_id == SCTP_CURRENT_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } + } + return 0; } @@ -3144,7 +3191,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); /* Set the values to the specific association */ - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; rto_max = rtoinfo.srto_max; @@ -3206,7 +3254,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); - if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Set the values to the specific association */ @@ -3319,7 +3368,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -3329,6 +3378,9 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned } asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; if (val) { int min_len, max_len; @@ -3346,8 +3398,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned asoc->user_frag = val; sctp_assoc_update_frag_point(asoc); } else { - if (params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; sp->user_frag = val; } @@ -3460,8 +3510,8 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval static int sctp_setsockopt_context(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assoc_value)) @@ -3469,17 +3519,26 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval, if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; + if (asoc) { asoc->default_rcv_context = params.assoc_value; - } else { - sp->default_rcv_context = params.assoc_value; + + return 0; } + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_rcv_context = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->default_rcv_context = params.assoc_value; + return 0; } @@ -3580,11 +3639,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; - int val; - int assoc_id = 0; if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED @@ -3592,25 +3649,34 @@ static int sctp_setsockopt_maxburst(struct sock *sk, "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(&val, optval, optlen)) + if (copy_from_user(¶ms.assoc_value, optval, optlen)) return -EFAULT; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - val = params.assoc_value; - assoc_id = params.assoc_id; } else return -EINVAL; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (assoc_id != 0) { - asoc = sctp_id2assoc(sk, assoc_id); - if (!asoc) - return -EINVAL; - asoc->max_burst = val; - } else - sp->max_burst = val; + if (asoc) { + asoc->max_burst = params.assoc_value; + + return 0; + } + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->max_burst = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->max_burst = params.assoc_value; return 0; } @@ -3702,7 +3768,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; - int ret; + int ret = -EINVAL; if (!ep->auth_enable) return -EACCES; @@ -3712,25 +3778,44 @@ static int sctp_setsockopt_auth_key(struct sock *sk, /* authkey->sca_keylength is u16, so optlen can't be bigger than * this. */ - optlen = min_t(unsigned int, optlen, USHRT_MAX + - sizeof(struct sctp_authkey)); + optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey)); authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey); - if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { - ret = -EINVAL; + if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; - } asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); - if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { - ret = -EINVAL; + if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + if (asoc) { + ret = sctp_auth_set_key(ep, asoc, authkey); goto out; } - ret = sctp_auth_set_key(ep, asoc, authkey); + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_key(ep, asoc, authkey); + if (ret) + goto out; + } + + ret = 0; + + if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_key(ep, asoc, authkey); + + if (res && !ret) + ret = res; + } + } + out: kzfree(authkey); return ret; @@ -3747,8 +3832,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3759,10 +3845,32 @@ static int sctp_setsockopt_active_key(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_active_key(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3775,8 +3883,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3787,11 +3896,32 @@ static int sctp_setsockopt_del_key(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_del_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3803,8 +3933,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3815,10 +3946,32 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_deact_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3884,11 +4037,25 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, sizeof(struct sctp_paddrthlds))) return -EFAULT; - - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (val.spt_pathmaxrxt) @@ -3900,14 +4067,11 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, asoc->pathmaxrxt = val.spt_pathmaxrxt; asoc->pf_retrans = val.spt_pathpfthld; } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; + struct sctp_sock *sp = sctp_sk(sk); if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; + sp->pathmaxrxt = val.spt_pathmaxrxt; + sp->pf_retrans = val.spt_pathpfthld; } return 0; @@ -3950,6 +4114,7 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, unsigned int optlen) { struct sctp_assoc_value params; + struct sctp_association *asoc; if (optlen != sizeof(params)) return -EINVAL; @@ -3957,6 +4122,11 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value; return 0; @@ -3966,6 +4136,7 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EINVAL; @@ -3985,19 +4156,31 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, info.pr_value = 0; asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); asoc->default_timetolive = info.pr_value; - } else if (!info.pr_assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); - sp->default_timetolive = info.pr_value; - } else { goto out; } - retval = 0; + if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { + SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); + sp->default_timetolive = info.pr_value; + } + + if (info.pr_assoc_id == SCTP_CURRENT_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); + asoc->default_timetolive = info.pr_value; + } + } out: return retval; @@ -4020,15 +4203,14 @@ static int sctp_setsockopt_reconfig_supported(struct sock *sk, } asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->reconf_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->reconf_enable = !!params.assoc_value; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - } + + if (asoc) + asoc->reconf_enable = !!params.assoc_value; + else + sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value; retval = 0; @@ -4040,6 +4222,7 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; @@ -4056,17 +4239,25 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { asoc->strreset_enable = params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->strreset_enable = params.assoc_value; - } else { goto out; } - retval = 0; + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + ep->strreset_enable = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &ep->asocs, asocs) + asoc->strreset_enable = params.assoc_value; out: return retval; @@ -4161,29 +4352,44 @@ static int sctp_setsockopt_scheduler(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_value params; - int retval = -EINVAL; + int retval = 0; if (optlen < sizeof(params)) - goto out; + return -EINVAL; optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; if (params.assoc_value > SCTP_SS_MAX) - goto out; + return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - goto out; + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - retval = sctp_sched_set_sched(asoc, params.assoc_value); + if (asoc) + return sctp_sched_set_sched(asoc, params.assoc_value); + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_ss = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_sched_set_sched(asoc, + params.assoc_value); + + if (ret && !retval) + retval = ret; + } + } -out: return retval; } @@ -4191,8 +4397,8 @@ static int sctp_setsockopt_scheduler_value(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_association *asoc; struct sctp_stream_value params; + struct sctp_association *asoc; int retval = -EINVAL; if (optlen < sizeof(params)) @@ -4205,11 +4411,24 @@ static int sctp_setsockopt_scheduler_value(struct sock *sk, } asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) + if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC && + sctp_style(sk, UDP)) goto out; - retval = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + if (asoc) { + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + goto out; + } + + retval = 0; + + list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { + int ret = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + if (ret && !retval) /* try to return the 1st error. */ + retval = ret; + } out: return retval; @@ -4220,8 +4439,8 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct net *net = sock_net(sk); struct sctp_assoc_value params; + struct sctp_association *asoc; int retval = -EINVAL; if (optlen < sizeof(params)) @@ -4233,10 +4452,12 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk, goto out; } - if (params.assoc_id) + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - if (!net->sctp.intl_enable || !sp->frag_interleave) { + if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { retval = -EPERM; goto out; } @@ -4271,54 +4492,69 @@ static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, return 0; } -static int sctp_setsockopt_event(struct sock *sk, char __user *optval, - unsigned int optlen) +static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, + struct sctp_association *asoc) { - struct sctp_association *asoc; struct sctp_ulpevent *event; - struct sctp_event param; - int retval = 0; - if (optlen < sizeof(param)) { - retval = -EINVAL; - goto out; - } + sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on); - optlen = sizeof(param); - if (copy_from_user(¶m, optval, optlen)) { - retval = -EFAULT; - goto out; - } - - if (param.se_type < SCTP_SN_TYPE_BASE || - param.se_type > SCTP_SN_TYPE_MAX) { - retval = -EINVAL; - goto out; - } - - asoc = sctp_id2assoc(sk, param.se_assoc_id); - if (!asoc) { - sctp_ulpevent_type_set(&sctp_sk(sk)->subscribe, - param.se_type, param.se_on); - goto out; - } - - sctp_ulpevent_type_set(&asoc->subscribe, param.se_type, param.se_on); - - if (param.se_type == SCTP_SENDER_DRY_EVENT && param.se_on) { + if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) { if (sctp_outq_is_empty(&asoc->outqueue)) { event = sctp_ulpevent_make_sender_dry_event(asoc, GFP_USER | __GFP_NOWARN); - if (!event) { - retval = -ENOMEM; - goto out; - } + if (!event) + return -ENOMEM; asoc->stream.si->enqueue_event(&asoc->ulpq, event); } } -out: + return 0; +} + +static int sctp_setsockopt_event(struct sock *sk, char __user *optval, + unsigned int optlen) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_event param; + int retval = 0; + + if (optlen < sizeof(param)) + return -EINVAL; + + optlen = sizeof(param); + if (copy_from_user(¶m, optval, optlen)) + return -EFAULT; + + if (param.se_type < SCTP_SN_TYPE_BASE || + param.se_type > SCTP_SN_TYPE_MAX) + return -EINVAL; + + asoc = sctp_id2assoc(sk, param.se_assoc_id); + if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + return sctp_assoc_ulpevent_type_set(¶m, asoc); + + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) + sctp_ulpevent_type_set(&sp->subscribe, + param.se_type, param.se_on); + + if (param.se_assoc_id == SCTP_CURRENT_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_assoc_ulpevent_type_set(¶m, asoc); + + if (ret && !retval) + retval = ret; + } + } + return retval; } @@ -4777,12 +5013,14 @@ static int sctp_init_sock(struct sock *sk) */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; + sp->pf_retrans = net->sctp.pf_retrans; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; + sp->default_ss = SCTP_SS_DEFAULT; /* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@ -5676,12 +5914,13 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, } } - /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@ -5810,19 +6049,19 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, } else return -EINVAL; - /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; if (asoc) { /* Fetch association values. */ if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { - params.sack_delay = jiffies_to_msecs( - asoc->sackdelay); + params.sack_delay = jiffies_to_msecs(asoc->sackdelay); params.sack_freq = asoc->sackfreq; } else { @@ -6175,8 +6414,10 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@ -6219,8 +6460,10 @@ static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len, return -EFAULT; asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.snd_sid = asoc->default_stream; info.snd_flags = asoc->default_flags; @@ -6296,7 +6539,8 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Values corresponding to the specific association. */ @@ -6353,7 +6597,8 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); - if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Values correspoinding to the specific association */ @@ -6428,7 +6673,6 @@ static int sctp_getsockopt_context(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (len < sizeof(struct sctp_assoc_value)) @@ -6439,16 +6683,13 @@ static int sctp_getsockopt_context(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->default_rcv_context; - } else { - params.assoc_value = sp->default_rcv_context; - } + params.assoc_value = asoc ? asoc->default_rcv_context + : sctp_sk(sk)->default_rcv_context; if (put_user(len, optlen)) return -EFAULT; @@ -6497,7 +6738,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@ -6506,7 +6747,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; if (asoc) @@ -6583,7 +6825,6 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (len == sizeof(int)) { @@ -6592,7 +6833,7 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, "Use of int in max_burst socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@ -6600,15 +6841,12 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, } else return -EINVAL; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->max_burst; - } else - params.assoc_value = sp->max_burst; + params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst; if (len == sizeof(int)) { if (copy_to_user(optval, ¶ms.assoc_value, len)) @@ -6759,14 +6997,12 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); - if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - if (asoc) - ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; - else - ch = ep->auth_chunk_list; - + ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks + : ep->auth_chunk_list; if (!ch) goto num; @@ -6911,14 +7147,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) return -EFAULT; - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - - val.spt_pathpfthld = asoc->pf_retrans; - val.spt_pathmaxrxt = asoc->pathmaxrxt; - } else { + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); if (!trans) @@ -6926,6 +7155,23 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + struct sctp_sock *sp = sctp_sk(sk); + + val.spt_pathpfthld = sp->pf_retrans; + val.spt_pathmaxrxt = sp->pathmaxrxt; } if (put_user(len, optlen) || copy_to_user(optval, &val, len)) @@ -7056,17 +7302,15 @@ static int sctp_getsockopt_pr_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->prsctp_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->prsctp_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->prsctp_enable + : sctp_sk(sk)->ep->prsctp_enable; + if (put_user(len, optlen)) goto out; @@ -7097,17 +7341,20 @@ static int sctp_getsockopt_default_prinfo(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + if (asoc) { info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); info.pr_value = asoc->default_timetolive; - } else if (!info.pr_assoc_id) { + } else { struct sctp_sock *sp = sctp_sk(sk); info.pr_policy = SCTP_PR_POLICY(sp->default_flags); info.pr_value = sp->default_timetolive; - } else { - retval = -EINVAL; - goto out; } if (put_user(len, optlen)) @@ -7263,17 +7510,15 @@ static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->reconf_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->reconf_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->reconf_enable + : sctp_sk(sk)->ep->reconf_enable; + if (put_user(len, optlen)) goto out; @@ -7304,17 +7549,15 @@ static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->strreset_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->strreset_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->strreset_enable + : sctp_sk(sk)->ep->strreset_enable; + if (put_user(len, optlen)) goto out; @@ -7345,12 +7588,14 @@ static int sctp_getsockopt_scheduler(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } - params.assoc_value = sctp_sched_get_sched(asoc); + params.assoc_value = asoc ? sctp_sched_get_sched(asoc) + : sctp_sk(sk)->default_ss; if (put_user(len, optlen)) goto out; @@ -7424,17 +7669,15 @@ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->intl_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->strm_interleave; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->intl_enable + : sctp_sk(sk)->strm_interleave; + if (put_user(len, optlen)) goto out; @@ -7486,6 +7729,10 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval, return -EINVAL; asoc = sctp_id2assoc(sk, param.se_assoc_id); + if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe; param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 3892e7630f3a..80e0ae5534ec 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -585,9 +585,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - __u16 i, nums, flags = 0; __be16 *str_p = NULL; __u32 request_seq; + __u16 i, nums; request_seq = ntohl(outreq->request_seq); @@ -615,6 +615,15 @@ struct sctp_chunk *sctp_process_strreset_outreq( if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_STREAM_REQ)) goto out; + nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); + str_p = outreq->list_of_streams; + for (i = 0; i < nums; i++) { + if (ntohs(str_p[i]) >= stream->incnt) { + result = SCTP_STRRESET_ERR_WRONG_SSN; + goto out; + } + } + if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, outreq->response_seq, @@ -637,32 +646,19 @@ struct sctp_chunk *sctp_process_strreset_outreq( sctp_chunk_put(asoc->strreset_chunk); asoc->strreset_chunk = NULL; } - - flags = SCTP_STREAM_RESET_INCOMING_SSN; } - nums = (ntohs(param.p->length) - sizeof(*outreq)) / sizeof(__u16); - if (nums) { - str_p = outreq->list_of_streams; - for (i = 0; i < nums; i++) { - if (ntohs(str_p[i]) >= stream->incnt) { - result = SCTP_STRRESET_ERR_WRONG_SSN; - goto out; - } - } - + if (nums) for (i = 0; i < nums; i++) SCTP_SI(stream, ntohs(str_p[i]))->mid = 0; - } else { + else for (i = 0; i < stream->incnt; i++) SCTP_SI(stream, i)->mid = 0; - } result = SCTP_STRRESET_PERFORMED; *evp = sctp_ulpevent_make_stream_reset_event(asoc, - flags | SCTP_STREAM_RESET_OUTGOING_SSN, nums, str_p, - GFP_ATOMIC); + SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); out: sctp_update_strreset_result(asoc, result); @@ -738,9 +734,6 @@ struct sctp_chunk *sctp_process_strreset_inreq( result = SCTP_STRRESET_PERFORMED; - *evp = sctp_ulpevent_make_stream_reset_event(asoc, - SCTP_STREAM_RESET_INCOMING_SSN, nums, str_p, GFP_ATOMIC); - out: sctp_update_strreset_result(asoc, result); err: @@ -873,6 +866,14 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ)) goto out; + in = ntohs(addstrm->number_of_streams); + incnt = stream->incnt + in; + if (!in || incnt > SCTP_MAX_STREAM) + goto out; + + if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) + goto out; + if (asoc->strreset_chunk) { if (!sctp_chunk_lookup_strreset_param( asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) { @@ -896,14 +897,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( } } - in = ntohs(addstrm->number_of_streams); - incnt = stream->incnt + in; - if (!in || incnt > SCTP_MAX_STREAM) - goto out; - - if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) - goto out; - stream->incnt = incnt; result = SCTP_STRRESET_PERFORMED; @@ -973,9 +966,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( result = SCTP_STRRESET_PERFORMED; - *evp = sctp_ulpevent_make_stream_change_event(asoc, - 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC); - out: sctp_update_strreset_result(asoc, result); err: @@ -1036,10 +1026,10 @@ struct sctp_chunk *sctp_process_strreset_resp( sout->mid_uo = 0; } } - - flags = SCTP_STREAM_RESET_OUTGOING_SSN; } + flags |= SCTP_STREAM_RESET_OUTGOING_SSN; + for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -1058,6 +1048,8 @@ struct sctp_chunk *sctp_process_strreset_resp( nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / sizeof(__u16); + flags |= SCTP_STREAM_RESET_INCOMING_SSN; + *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags, nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) { diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 1ff9768f5456..f3023bbc0b7f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -41,6 +41,9 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), +#ifdef CONFIG_DEBUG_CREDENTIALS + .magic = CRED_MAGIC, +#endif }; /* diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dc86713b32b6..1531b0219344 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1549,8 +1549,10 @@ gss_marshal(struct rpc_task *task, __be32 *p) cred_len = p++; spin_lock(&ctx->gc_seq_lock); - req->rq_seqno = ctx->gc_seq++; + req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); + if (req->rq_seqno == MAXSEQ) + goto out_expired; *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); @@ -1572,14 +1574,18 @@ gss_marshal(struct rpc_task *task, __be32 *p) mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + goto out_expired; } else if (maj_stat != 0) { - printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); + pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); + task->tk_status = -EIO; goto out_put_ctx; } p = xdr_encode_opaque(p, NULL, mic.len); gss_put_ctx(ctx); return p; +out_expired: + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + task->tk_status = -EKEYEXPIRED; out_put_ctx: gss_put_ctx(ctx); return NULL; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 71d9599b5816..d7ec6132c046 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1739,14 +1739,10 @@ rpc_xdr_encode(struct rpc_task *task) xdr_buf_init(&req->rq_rcv_buf, req->rq_rbuffer, req->rq_rcvsize); - req->rq_bytes_sent = 0; p = rpc_encode_header(task); - if (p == NULL) { - printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); - rpc_exit(task, -EIO); + if (p == NULL) return; - } encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) @@ -1771,10 +1767,17 @@ call_encode(struct rpc_task *task) /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ - if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM) + switch (task->tk_status) { + case -EAGAIN: + case -ENOMEM: rpc_delay(task, HZ >> 4); - else + break; + case -EKEYEXPIRED: + task->tk_action = call_refresh; + break; + default: rpc_exit(task, task->tk_status); + } return; } @@ -2336,7 +2339,8 @@ rpc_encode_header(struct rpc_task *task) *p++ = htonl(clnt->cl_vers); /* program version */ *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ p = rpcauth_marshcred(task, p); - req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); + if (p) + req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); return p; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 73547d17d3c6..f1ec2110efeb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1151,6 +1151,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; if (xprt_request_need_enqueue_transmit(task, req)) { + req->rq_bytes_sent = 0; spin_lock(&xprt->queue_lock); /* * Requests that carry congestion control credits are added @@ -1177,7 +1178,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else { + } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) continue; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7749a2bf6887..4994e75945b8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -845,17 +845,13 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i <= buf->rb_sc_last; i++) { sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); if (!sc) - goto out_destroy; + return -ENOMEM; sc->sc_xprt = r_xprt; buf->rb_sc_ctxs[i] = sc; } return 0; - -out_destroy: - rpcrdma_sendctxs_destroy(buf); - return -ENOMEM; } /* The sendctx queue is not guaranteed to have a size that is a @@ -1113,8 +1109,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) WQ_MEM_RECLAIM | WQ_HIGHPRI, 0, r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); - if (!buf->rb_completion_wq) + if (!buf->rb_completion_wq) { + rc = -ENOMEM; goto out; + } return 0; out: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 13559e6a460b..7754aa3e434f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -376,6 +377,26 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, return sock_recvmsg(sock, msg, flags); } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void +xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = count, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void +xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) +{ +} +#endif + static ssize_t xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, struct xdr_buf *buf, size_t count, size_t seek, size_t *read) @@ -409,6 +430,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, seek + buf->page_base); if (ret <= 0) goto sock_err; + xs_flush_bvec(buf->bvec, ret, seek + buf->page_base); offset += ret - buf->page_base; if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; diff --git a/net/tipc/link.c b/net/tipc/link.c index 2792a3cae682..ac306d17f8ad 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1126,7 +1126,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, skb_queue_tail(mc_inputq, skb); return true; } - /* else: fall through */ + /* fall through */ case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 77e4b2418f30..4ad3586da8f0 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb) return limit; } +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) { struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); @@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str) return buf; } +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) @@ -379,6 +389,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, struct nlattr *prop; struct nlattr *bearer; struct tipc_bearer_config *b; + int len; b = (struct tipc_bearer_config *)TLV_DATA(msg->req); @@ -386,6 +397,10 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) return -EMSGSIZE; @@ -411,6 +426,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *bearer; + int len; name = (char *)TLV_DATA(msg->req); @@ -418,6 +434,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) return -EMSGSIZE; @@ -478,6 +498,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; int err; + int len; if (!attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -504,6 +525,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return err; name = (char *)TLV_DATA(msg->req); + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; @@ -644,6 +670,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -651,6 +678,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (!media) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; @@ -671,6 +702,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -678,6 +710,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; @@ -726,9 +762,14 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, struct tipc_link_config *lc; struct tipc_bearer *bearer; struct tipc_media *media; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + media = tipc_media_find(lc->name); if (media) { cmd->doit = &__tipc_nl_media_set; @@ -750,6 +791,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *link; + int len; name = (char *)TLV_DATA(msg->req); @@ -757,6 +799,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) return -EMSGSIZE; @@ -778,6 +824,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; depth = ntohl(ntq->depth); @@ -1208,7 +1256,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (len && !TLV_OK(msg.req, len)) { + if (!len || !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1217c90a363b..8fc5acd4820d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -735,7 +735,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; - /* fall thru' */ + /* fall through */ case TIPC_LISTEN: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; @@ -2416,7 +2416,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - /* fall thru' */ + /* fall through */ case TIPC_CONNECTING: if (!timeout) { if (previous == TIPC_CONNECTING) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index efb16f69bd2c..4a708a4e8583 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -60,7 +60,6 @@ * @awork: accept work item * @rcv_wq: receive workqueue * @send_wq: send workqueue - * @max_rcvbuf_size: maximum permitted receive message length * @listener: topsrv listener socket * @name: server name */ @@ -72,7 +71,6 @@ struct tipc_topsrv { struct work_struct awork; struct workqueue_struct *rcv_wq; struct workqueue_struct *send_wq; - int max_rcvbuf_size; struct socket *listener; char name[TIPC_SERVER_NAME_LEN]; }; @@ -398,7 +396,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; - if (ret > 0) { + if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); @@ -648,7 +646,6 @@ int tipc_topsrv_start(struct net *net) return -ENOMEM; srv->net = net; - srv->max_rcvbuf_size = sizeof(struct tipc_subscr); INIT_WORK(&srv->awork, tipc_topsrv_accept); strscpy(srv->name, name, sizeof(srv->name)); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 78cb4a584080..d36d095cbcf0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -61,6 +61,8 @@ static LIST_HEAD(device_list); static DEFINE_SPINLOCK(device_spinlock); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static struct proto_ops tls_sw_proto_ops; +static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], + struct proto *base); static void update_sk_prot(struct sock *sk, struct tls_context *ctx) { @@ -264,8 +266,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) lock_sock(sk); sk_proto_close = ctx->sk_proto_close; - if ((ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) || - (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE)) { + if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD) + goto skip_tx_cleanup; + + if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) { free_ctx = true; goto skip_tx_cleanup; } @@ -551,6 +555,43 @@ static struct tls_context *create_ctx(struct sock *sk) return ctx; } +static void tls_build_proto(struct sock *sk) +{ + int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; + + /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ + if (ip_ver == TLSV6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + mutex_lock(&tcpv6_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(tls_prots[TLSV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + mutex_unlock(&tcpv6_prot_mutex); + } + + if (ip_ver == TLSV4 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) { + mutex_lock(&tcpv4_prot_mutex); + if (likely(sk->sk_prot != saved_tcpv4_prot)) { + build_protos(tls_prots[TLSV4], sk->sk_prot); + smp_store_release(&saved_tcpv4_prot, sk->sk_prot); + } + mutex_unlock(&tcpv4_prot_mutex); + } +} + +static void tls_hw_sk_destruct(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + ctx->sk_destruct(sk); + /* Free ctx */ + kfree(ctx); + icsk->icsk_ulp_data = NULL; +} + static int tls_hw_prot(struct sock *sk) { struct tls_context *ctx; @@ -564,12 +605,17 @@ static int tls_hw_prot(struct sock *sk) if (!ctx) goto out; + spin_unlock_bh(&device_spinlock); + tls_build_proto(sk); ctx->hash = sk->sk_prot->hash; ctx->unhash = sk->sk_prot->unhash; ctx->sk_proto_close = sk->sk_prot->close; + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_hw_sk_destruct; ctx->rx_conf = TLS_HW_RECORD; ctx->tx_conf = TLS_HW_RECORD; update_sk_prot(sk, ctx); + spin_lock_bh(&device_spinlock); rc = 1; break; } @@ -668,7 +714,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], static int tls_init(struct sock *sk) { - int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; struct tls_context *ctx; int rc = 0; @@ -691,27 +736,7 @@ static int tls_init(struct sock *sk) goto out; } - /* Build IPv6 TLS whenever the address of tcpv6 _prot changes */ - if (ip_ver == TLSV6 && - unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { - mutex_lock(&tcpv6_prot_mutex); - if (likely(sk->sk_prot != saved_tcpv6_prot)) { - build_protos(tls_prots[TLSV6], sk->sk_prot); - smp_store_release(&saved_tcpv6_prot, sk->sk_prot); - } - mutex_unlock(&tcpv6_prot_mutex); - } - - if (ip_ver == TLSV4 && - unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv4_prot))) { - mutex_lock(&tcpv4_prot_mutex); - if (likely(sk->sk_prot != saved_tcpv4_prot)) { - build_protos(tls_prots[TLSV4], sk->sk_prot); - smp_store_release(&saved_tcpv4_prot, sk->sk_prot); - } - mutex_unlock(&tcpv4_prot_mutex); - } - + tls_build_proto(sk); ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; update_sk_prot(sk, ctx); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 86b9527c4826..3f2a6af27e62 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -447,6 +447,8 @@ static int tls_do_encryption(struct sock *sk, struct scatterlist *sge = sk_msg_elem(msg_en, start); int rc; + memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); + sge->offset += tls_ctx->tx.prepend_size; sge->length -= tls_ctx->tx.prepend_size; @@ -456,7 +458,7 @@ static int tls_do_encryption(struct sock *sk, aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); aead_request_set_crypt(aead_req, rec->sg_aead_in, rec->sg_aead_out, - data_len, tls_ctx->tx.iv); + data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_encrypt_done, sk); @@ -1901,7 +1903,9 @@ void tls_sw_free_resources_tx(struct sock *sk) if (atomic_read(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + release_sock(sk); cancel_delayed_work_sync(&ctx->tx_work.work); + lock_sock(sk); /* Tx whatever records we can transmit and abandon the rest */ tls_tx_records(sk, -1); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index af89e5c9fd0a..ed53f8332ec8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -555,7 +555,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { }, [NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PEER_MEASUREMENTS] = - NLA_POLICY_NESTED(NL80211_PMSR_FTM_REQ_ATTR_MAX, + NLA_POLICY_NESTED(NL80211_PMSR_ATTR_MAX, nl80211_pmsr_attr_policy), [NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1), }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7def1ece22b2..adfa58fa6536 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1024,8 +1024,13 @@ static void regdb_fw_cb(const struct firmware *fw, void *context) } rtnl_lock(); - if (WARN_ON(regdb && !IS_ERR(regdb))) { - /* just restore and free new db */ + if (regdb && !IS_ERR(regdb)) { + /* negative case - a bug + * positive case - can happen due to race in case of multiple cb's in + * queue, due to usage of asynchronous callback + * + * Either case, just restore and free new db. + */ } else if (set_error) { regdb = ERR_PTR(set_error); } else if (fw) { @@ -1255,7 +1260,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), * however it is safe for now to assume that a frequency rule should not be * part of a frequency's band if the start freq or end freq are off by more - * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". @@ -1270,7 +1275,7 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * with the Channel starting frequency above 45 GHz. */ u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? - 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; if (abs(freq_khz - freq_range->end_freq_khz) <= limit) diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig index 90e4a7152854..0255b33cff4b 100644 --- a/net/xdp/Kconfig +++ b/net/xdp/Kconfig @@ -5,3 +5,11 @@ config XDP_SOCKETS help XDP sockets allows a channel between XDP programs and userspace applications. + +config XDP_SOCKETS_DIAG + tristate "XDP sockets: monitoring interface" + depends on XDP_SOCKETS + default n + help + Support for PF_XDP sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/xdp/Makefile b/net/xdp/Makefile index 04f073146256..59dbfdf93dca 100644 --- a/net/xdp/Makefile +++ b/net/xdp/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o +obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index a264cf2accd0..5ab236c5c9a5 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -13,12 +13,15 @@ #include #include #include +#include #include "xdp_umem.h" #include "xsk_queue.h" #define XDP_UMEM_MIN_CHUNK_SIZE 2048 +static DEFINE_IDA(umem_ida); + void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) { unsigned long flags; @@ -41,13 +44,20 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) * not know if the device has more tx queues than rx, or the opposite. * This might also change during run time. */ -static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, - u16 queue_id) +static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, + u16 queue_id) { + if (queue_id >= max_t(unsigned int, + dev->real_num_rx_queues, + dev->real_num_tx_queues)) + return -EINVAL; + if (queue_id < dev->real_num_rx_queues) dev->_rx[queue_id].umem = umem; if (queue_id < dev->real_num_tx_queues) dev->_tx[queue_id].umem = umem; + + return 0; } struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, @@ -60,6 +70,7 @@ struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, return NULL; } +EXPORT_SYMBOL(xdp_get_umem_from_qid); static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id) { @@ -88,7 +99,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, goto out_rtnl_unlock; } - xdp_reg_umem_at_qid(dev, umem, queue_id); + err = xdp_reg_umem_at_qid(dev, umem, queue_id); + if (err) + goto out_rtnl_unlock; + umem->dev = dev; umem->queue_id = queue_id; if (force_copy) @@ -183,6 +197,8 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_clear_dev(umem); + ida_simple_remove(&umem_ida, umem->id); + if (umem->fq) { xskq_destroy(umem->fq); umem->fq = NULL; @@ -389,8 +405,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) if (!umem) return ERR_PTR(-ENOMEM); + err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); + if (err < 0) { + kfree(umem); + return ERR_PTR(err); + } + umem->id = err; + err = xdp_umem_reg(umem, mr); if (err) { + ida_simple_remove(&umem_ida, umem->id); kfree(umem); return ERR_PTR(err); } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a03268454a27..949d3bbccb2f 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -27,14 +27,10 @@ #include "xsk_queue.h" #include "xdp_umem.h" +#include "xsk.h" #define TX_BATCH_SIZE 16 -static struct xdp_sock *xdp_sk(struct sock *sk) -{ - return (struct xdp_sock *)sk; -} - bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && @@ -350,6 +346,10 @@ static int xsk_release(struct socket *sock) net = sock_net(sk); + mutex_lock(&net->xdp.lock); + sk_del_node_init_rcu(sk); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); @@ -746,6 +746,10 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); + mutex_lock(&net->xdp.lock); + sk_add_node_rcu(sk, &net->xdp.list); + mutex_unlock(&net->xdp.lock); + local_bh_disable(); sock_prot_inuse_add(net, &xsk_proto, 1); local_bh_enable(); @@ -759,6 +763,23 @@ static const struct net_proto_family xsk_family_ops = { .owner = THIS_MODULE, }; +static int __net_init xsk_net_init(struct net *net) +{ + mutex_init(&net->xdp.lock); + INIT_HLIST_HEAD(&net->xdp.list); + return 0; +} + +static void __net_exit xsk_net_exit(struct net *net) +{ + WARN_ON_ONCE(!hlist_empty(&net->xdp.list)); +} + +static struct pernet_operations xsk_net_ops = { + .init = xsk_net_init, + .exit = xsk_net_exit, +}; + static int __init xsk_init(void) { int err; @@ -771,8 +792,13 @@ static int __init xsk_init(void) if (err) goto out_proto; + err = register_pernet_subsys(&xsk_net_ops); + if (err) + goto out_sk; return 0; +out_sk: + sock_unregister(PF_XDP); out_proto: proto_unregister(&xsk_proto); out: diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h new file mode 100644 index 000000000000..ba8120610426 --- /dev/null +++ b/net/xdp/xsk.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2019 Intel Corporation. */ + +#ifndef XSK_H_ +#define XSK_H_ + +static inline struct xdp_sock *xdp_sk(struct sock *sk) +{ + return (struct xdp_sock *)sk; +} + +#endif /* XSK_H_ */ diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c new file mode 100644 index 000000000000..661d007c3b28 --- /dev/null +++ b/net/xdp/xsk_diag.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +/* XDP sockets monitoring support + * + * Copyright(c) 2019 Intel Corporation. + * + * Author: Björn Töpel + */ + +#include +#include +#include +#include + +#include "xsk_queue.h" +#include "xsk.h" + +static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_diag_info di = {}; + + di.ifindex = xs->dev ? xs->dev->ifindex : 0; + di.queue_id = xs->queue_id; + return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); +} + +static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, + struct sk_buff *nlskb) +{ + struct xdp_diag_ring dr = {}; + + dr.entries = queue->nentries; + return nla_put(nlskb, nl_type, sizeof(dr), &dr); +} + +static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, + struct sk_buff *nlskb) +{ + int err = 0; + + if (xs->rx) + err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); + if (!err && xs->tx) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); + return err; +} + +static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) +{ + struct xdp_umem *umem = xs->umem; + struct xdp_diag_umem du = {}; + int err; + + if (!umem) + return 0; + + du.id = umem->id; + du.size = umem->size; + du.num_pages = umem->npgs; + du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.headroom = umem->headroom; + du.ifindex = umem->dev ? umem->dev->ifindex : 0; + du.queue_id = umem->queue_id; + du.flags = 0; + if (umem->zc) + du.flags |= XDP_DU_F_ZEROCOPY; + du.refs = refcount_read(&umem->users); + + err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); + + if (!err && umem->fq) + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + if (!err && umem->cq) { + err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + nlskb); + } + return err; +} + +static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, + struct xdp_diag_req *req, + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct xdp_sock *xs = xdp_sk(sk); + struct xdp_diag_msg *msg; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), + flags); + if (!nlh) + return -EMSGSIZE; + + msg = nlmsg_data(nlh); + memset(msg, 0, sizeof(*msg)); + msg->xdiag_family = AF_XDP; + msg->xdiag_type = sk->sk_type; + msg->xdiag_ino = sk_ino; + sock_diag_save_cookie(sk, msg->xdiag_cookie); + + if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_INFO) && + nla_put_u32(nlskb, XDP_DIAG_UID, + from_kuid_munged(user_ns, sock_i_uid(sk)))) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_RING_CFG) && + xsk_diag_put_rings_cfg(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_UMEM) && + xsk_diag_put_umem(xs, nlskb)) + goto out_nlmsg_trim; + + if ((req->xdiag_show & XDP_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + nlmsg_end(nlskb, nlh); + return 0; + +out_nlmsg_trim: + nlmsg_cancel(nlskb, nlh); + return -EMSGSIZE; +} + +static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) +{ + struct xdp_diag_req *req = nlmsg_data(cb->nlh); + struct net *net = sock_net(nlskb->sk); + int num = 0, s_num = cb->args[0]; + struct sock *sk; + + mutex_lock(&net->xdp.lock); + + sk_for_each(sk, &net->xdp.list) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num++ < s_num) + continue; + + if (xsk_diag_fill(sk, nlskb, req, + sk_user_ns(NETLINK_CB(cb->skb).sk), + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + num--; + break; + } + } + + mutex_unlock(&net->xdp.lock); + cb->args[0] = num; + return nlskb->len; +} + +static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) +{ + struct netlink_dump_control c = { .dump = xsk_diag_dump }; + int hdrlen = sizeof(struct xdp_diag_req); + struct net *net = sock_net(nlskb->sk); + + if (nlmsg_len(hdr) < hdrlen) + return -EINVAL; + + if (!(hdr->nlmsg_flags & NLM_F_DUMP)) + return -EOPNOTSUPP; + + return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); +} + +static const struct sock_diag_handler xsk_diag_handler = { + .family = AF_XDP, + .dump = xsk_diag_handler_dump, +}; + +static int __init xsk_diag_init(void) +{ + return sock_diag_register(&xsk_diag_handler); +} + +static void __exit xsk_diag_exit(void) +{ + sock_diag_unregister(&xsk_diag_handler); +} + +module_init(xsk_diag_init); +module_exit(xsk_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 934492bad8e0..ba0a4048c846 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -680,16 +680,6 @@ static void xfrm_hash_resize(struct work_struct *work) mutex_unlock(&hash_resize_mutex); } -static void xfrm_hash_reset_inexact_table(struct net *net) -{ - struct xfrm_pol_inexact_bin *b; - - lockdep_assert_held(&net->xfrm.xfrm_policy_lock); - - list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins) - INIT_HLIST_HEAD(&b->hhead); -} - /* Make sure *pol can be inserted into fastbin. * Useful to check that later insert requests will be sucessful * (provided xfrm_policy_lock is held throughout). @@ -833,13 +823,13 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, u16 family) { unsigned int matched_s, matched_d; - struct hlist_node *newpos = NULL; struct xfrm_policy *policy, *p; matched_s = 0; matched_d = 0; list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + struct hlist_node *newpos = NULL; bool matches_s, matches_d; if (!policy->bydst_reinsert) @@ -849,16 +839,19 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, policy->bydst_reinsert = false; hlist_for_each_entry(p, &n->hhead, bydst) { - if (policy->priority >= p->priority) + if (policy->priority > p->priority) + newpos = &p->bydst; + else if (policy->priority == p->priority && + policy->pos > p->pos) newpos = &p->bydst; else break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, &n->hhead); + hlist_add_head_rcu(&policy->bydst, &n->hhead); /* paranoia checks follow. * Check that the reinserted policy matches at least @@ -893,12 +886,13 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, struct rb_root *new, u16 family) { - struct rb_node **p, *parent = NULL; struct xfrm_pol_inexact_node *node; + struct rb_node **p, *parent; /* we should not have another subtree here */ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); - +restart: + parent = NULL; p = &new->rb_node; while (*p) { u8 prefixlen; @@ -918,12 +912,11 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, } else { struct xfrm_policy *tmp; - hlist_for_each_entry(tmp, &node->hhead, bydst) - tmp->bydst_reinsert = true; - hlist_for_each_entry(tmp, &n->hhead, bydst) + hlist_for_each_entry(tmp, &n->hhead, bydst) { tmp->bydst_reinsert = true; + hlist_del_rcu(&tmp->bydst); + } - INIT_HLIST_HEAD(&node->hhead); xfrm_policy_inexact_list_reinsert(net, node, family); if (node->prefixlen == n->prefixlen) { @@ -935,8 +928,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, kfree_rcu(n, rcu); n = node; n->prefixlen = prefixlen; - *p = new->rb_node; - parent = NULL; + goto restart; } } @@ -965,12 +957,11 @@ static void xfrm_policy_inexact_node_merge(struct net *net, family); } - hlist_for_each_entry(tmp, &v->hhead, bydst) - tmp->bydst_reinsert = true; - hlist_for_each_entry(tmp, &n->hhead, bydst) + hlist_for_each_entry(tmp, &v->hhead, bydst) { tmp->bydst_reinsert = true; + hlist_del_rcu(&tmp->bydst); + } - INIT_HLIST_HEAD(&n->hhead); xfrm_policy_inexact_list_reinsert(net, n, family); } @@ -1235,6 +1226,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_begin(&xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1278,10 +1270,14 @@ static void xfrm_hash_rebuild(struct work_struct *work) } /* reset the bydst and inexact table in all directions */ - xfrm_hash_reset_inexact_table(net); - for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); + struct hlist_node *n; + + hlist_for_each_entry_safe(policy, n, + &net->xfrm.policy_inexact[dir], + bydst_inexact_list) + hlist_del_init(&policy->bydst_inexact_list); + hmask = net->xfrm.policy_bydst[dir].hmask; odst = net->xfrm.policy_bydst[dir].table; for (i = hmask; i >= 0; i--) @@ -1313,6 +1309,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); + + hlist_del_rcu(&policy->bydst); + if (!chain) { void *p = xfrm_policy_inexact_insert(policy, dir, 0); @@ -1334,6 +1333,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); + write_seqcount_end(&xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2600,7 +2600,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); + __u32 mark = 0; + + if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) + mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); family = xfrm[i]->props.family; dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 277c1c46fe94..c6d26afcf89d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1488,10 +1488,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 66ae15f27c70..db1a91dfa702 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -279,6 +279,7 @@ $(obj)/%.o: $(src)/%.c -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ + -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h new file mode 100644 index 000000000000..5cd7c1d1a5d5 --- /dev/null +++ b/samples/bpf/asm_goto_workaround.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Facebook */ +#ifndef __ASM_GOTO_WORKAROUND_H +#define __ASM_GOTO_WORKAROUND_H + +/* this will bring in asm_volatile_goto macro definition + * if enabled by compiler and config options. + */ +#include + +#ifdef asm_volatile_goto +#undef asm_volatile_goto +#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#endif + +#endif diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index 20dc5cefec84..544237980582 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -164,6 +164,16 @@ struct bpf_insn; .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -174,6 +184,16 @@ struct bpf_insn; .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 525bff667a52..30816037036e 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -23,10 +23,6 @@ depfile = $(subst $(comma),_,$(dot-target).d) # filename of target with directory and extension stripped basetarget = $(basename $(notdir $@)) -### -# filename of first prerequisite with directory and extension stripped -baseprereq = $(basename $(notdir $<)) - ### # Escape single quote for use in echo statements escsq = $(subst $(squote),'\$(squote)',$1) diff --git a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c index de70b8470971..89c47f57d1ce 100644 --- a/scripts/gcc-plugins/arm_ssp_per_task_plugin.c +++ b/scripts/gcc-plugins/arm_ssp_per_task_plugin.c @@ -13,7 +13,7 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) for (insn = get_insns(); insn; insn = NEXT_INSN(insn)) { const char *sym; rtx body; - rtx masked_sp; + rtx mask, masked_sp; /* * Find a SET insn involving a SYMBOL_REF to __stack_chk_guard @@ -33,12 +33,13 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) * produces the address of the copy of the stack canary value * stored in struct thread_info */ + mask = GEN_INT(sext_hwi(sp_mask, GET_MODE_PRECISION(Pmode))); masked_sp = gen_reg_rtx(Pmode); emit_insn_before(gen_rtx_SET(masked_sp, gen_rtx_AND(Pmode, stack_pointer_rtx, - GEN_INT(sp_mask))), + mask)), insn); SET_SRC(body) = gen_rtx_PLUS(Pmode, masked_sp, @@ -52,6 +53,19 @@ static unsigned int arm_pertask_ssp_rtl_execute(void) #define NO_GATE #include "gcc-generate-rtl-pass.h" +#if BUILDING_GCC_VERSION >= 9000 +static bool no(void) +{ + return false; +} + +static void arm_pertask_ssp_start_unit(void *gcc_data, void *user_data) +{ + targetm.have_stack_protect_combined_set = no; + targetm.have_stack_protect_combined_test = no; +} +#endif + __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { @@ -99,5 +113,10 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &arm_pertask_ssp_rtl_pass_info); +#if BUILDING_GCC_VERSION >= 9000 + register_callback(plugin_info->base_name, PLUGIN_START_UNIT, + arm_pertask_ssp_start_unit, NULL); +#endif + return 0; } diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index c05ab001b54c..181973509a05 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -206,4 +206,4 @@ filechk_conf_cfg = $(CONFIG_SHELL) $< $(obj)/%conf-cfg: $(src)/%conf-cfg.sh FORCE $(call filechk,conf_cfg) -clean-files += conf-cfg +clean-files += *conf-cfg diff --git a/security/security.c b/security/security.c index f1b8d2587639..55bc49027ba9 100644 --- a/security/security.c +++ b/security/security.c @@ -1027,6 +1027,13 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp) void security_cred_free(struct cred *cred) { + /* + * There is a failure case in prepare_creds() that + * may result in a call here with ->security being NULL. + */ + if (unlikely(cred->security == NULL)) + return; + call_void_hook(cred_free, cred); } diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index a50d625e7946..c1c31e33657a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -732,7 +732,8 @@ static int sens_destroy(void *key, void *datum, void *p) kfree(key); if (datum) { levdatum = datum; - ebitmap_destroy(&levdatum->level->cat); + if (levdatum->level) + ebitmap_destroy(&levdatum->level->cat); kfree(levdatum->level); } kfree(datum); diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index ffda91a4a1aa..02514fe558b4 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -368,7 +368,9 @@ static int yama_ptrace_access_check(struct task_struct *child, break; case YAMA_SCOPE_RELATIONAL: rcu_read_lock(); - if (!task_is_descendant(current, child) && + if (!pid_alive(child)) + rc = -EPERM; + if (!rc && !task_is_descendant(current, child) && !ptracer_exception_found(current, child) && !ns_capable(__task_cred(child)->user_ns, CAP_SYS_PTRACE)) rc = -EPERM; diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index a5b09e75e787..f7d2b373da0a 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -541,7 +541,8 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size) + params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments == 0) return -EINVAL; /* now codec parameters */ diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 51cc6589443f..152f54137082 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -931,6 +931,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0b3e7a18ca78..b4f472157ebd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6926,7 +6926,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC293_FIXUP_LENOVO_SPK_NOISE, .name = "lenovo-spk-noise"}, {.id = ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, .name = "lenovo-hotkey"}, {.id = ALC255_FIXUP_DELL_SPK_NOISE, .name = "dell-spk-noise"}, - {.id = ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc255-dell1"}, + {.id = ALC225_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "alc225-dell1"}, {.id = ALC295_FIXUP_DISABLE_DAC3, .name = "alc295-disable-dac3"}, {.id = ALC280_FIXUP_HP_HEADSET_MIC, .name = "alc280-hp-headset"}, {.id = ALC221_FIXUP_HP_FRONT_MIC, .name = "alc221-hp-mic"}, diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 022a8912c8a2..3d58338fa3cf 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -611,14 +611,16 @@ static int acp3x_audio_probe(struct platform_device *pdev) } irqflags = *((unsigned int *)(pdev->dev.platform_data)); - adata = devm_kzalloc(&pdev->dev, sizeof(struct i2s_dev_data), - GFP_KERNEL); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "IORESOURCE_IRQ FAILED\n"); return -ENODEV; } + adata = devm_kzalloc(&pdev->dev, sizeof(*adata), GFP_KERNEL); + if (!adata) + return -ENOMEM; + adata->acp3x_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 3ab2949c1dfa..b19d7a3e7a2c 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1890,51 +1890,31 @@ static void hdmi_codec_remove(struct snd_soc_component *component) pm_runtime_disable(&hdev->dev); } -#ifdef CONFIG_PM -static int hdmi_codec_prepare(struct device *dev) -{ - struct hdac_device *hdev = dev_to_hdac_dev(dev); - - pm_runtime_get_sync(&hdev->dev); - - /* - * Power down afg. - * codec_read is preferred over codec_write to set the power state. - * This way verb is send to set the power state and response - * is received. So setting power state is ensured without using loop - * to read the state. - */ - snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, - AC_PWRST_D3); - - return 0; -} - -static void hdmi_codec_complete(struct device *dev) +#ifdef CONFIG_PM_SLEEP +static int hdmi_codec_resume(struct device *dev) { struct hdac_device *hdev = dev_to_hdac_dev(dev); struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev); + int ret; - /* Power up afg */ - snd_hdac_codec_read(hdev, hdev->afg, 0, AC_VERB_SET_POWER_STATE, - AC_PWRST_D0); - - hdac_hdmi_skl_enable_all_pins(hdev); - hdac_hdmi_skl_enable_dp12(hdev); - + ret = pm_runtime_force_resume(dev); + if (ret < 0) + return ret; /* * As the ELD notify callback request is not entertained while the * device is in suspend state. Need to manually check detection of * all pins here. pin capablity change is not support, so use the * already set pin caps. + * + * NOTE: this is safe to call even if the codec doesn't actually resume. + * The pin check involves only with DRM audio component hooks, so it + * works even if the HD-audio side is still dreaming peacefully. */ hdac_hdmi_present_sense_all_pins(hdev, hdmi, false); - - pm_runtime_put_sync(&hdev->dev); + return 0; } #else -#define hdmi_codec_prepare NULL -#define hdmi_codec_complete NULL +#define hdmi_codec_resume NULL #endif static const struct snd_soc_component_driver hdmi_hda_codec = { @@ -2135,75 +2115,6 @@ static int hdac_hdmi_dev_remove(struct hdac_device *hdev) } #ifdef CONFIG_PM -/* - * Power management sequences - * ========================== - * - * The following explains the PM handling of HDAC HDMI with its parent - * device SKL and display power usage - * - * Probe - * ----- - * In SKL probe, - * 1. skl_probe_work() powers up the display (refcount++ -> 1) - * 2. enumerates the codecs on the link - * 3. powers down the display (refcount-- -> 0) - * - * In HDAC HDMI probe, - * 1. hdac_hdmi_dev_probe() powers up the display (refcount++ -> 1) - * 2. probe the codec - * 3. put the HDAC HDMI device to runtime suspend - * 4. hdac_hdmi_runtime_suspend() powers down the display (refcount-- -> 0) - * - * Once children are runtime suspended, SKL device also goes to runtime - * suspend - * - * HDMI Playback - * ------------- - * Open HDMI device, - * 1. skl_runtime_resume() invoked - * 2. hdac_hdmi_runtime_resume() powers up the display (refcount++ -> 1) - * - * Close HDMI device, - * 1. hdac_hdmi_runtime_suspend() powers down the display (refcount-- -> 0) - * 2. skl_runtime_suspend() invoked - * - * S0/S3 Cycle with playback in progress - * ------------------------------------- - * When the device is opened for playback, the device is runtime active - * already and the display refcount is 1 as explained above. - * - * Entering to S3, - * 1. hdmi_codec_prepare() invoke the runtime resume of codec which just - * increments the PM runtime usage count of the codec since the device - * is in use already - * 2. skl_suspend() powers down the display (refcount-- -> 0) - * - * Wakeup from S3, - * 1. skl_resume() powers up the display (refcount++ -> 1) - * 2. hdmi_codec_complete() invokes the runtime suspend of codec which just - * decrements the PM runtime usage count of the codec since the device - * is in use already - * - * Once playback is stopped, the display refcount is set to 0 as explained - * above in the HDMI playback sequence. The PM handlings are designed in - * such way that to balance the refcount of display power when the codec - * device put to S3 while playback is going on. - * - * S0/S3 Cycle without playback in progress - * ---------------------------------------- - * Entering to S3, - * 1. hdmi_codec_prepare() invoke the runtime resume of codec - * 2. skl_runtime_resume() invoked - * 3. hdac_hdmi_runtime_resume() powers up the display (refcount++ -> 1) - * 4. skl_suspend() powers down the display (refcount-- -> 0) - * - * Wakeup from S3, - * 1. skl_resume() powers up the display (refcount++ -> 1) - * 2. hdmi_codec_complete() invokes the runtime suspend of codec - * 3. hdac_hdmi_runtime_suspend() powers down the display (refcount-- -> 0) - * 4. skl_runtime_suspend() invoked - */ static int hdac_hdmi_runtime_suspend(struct device *dev) { struct hdac_device *hdev = dev_to_hdac_dev(dev); @@ -2277,8 +2188,7 @@ static int hdac_hdmi_runtime_resume(struct device *dev) static const struct dev_pm_ops hdac_hdmi_pm = { SET_RUNTIME_PM_OPS(hdac_hdmi_runtime_suspend, hdac_hdmi_runtime_resume, NULL) - .prepare = hdmi_codec_prepare, - .complete = hdmi_codec_complete, + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, hdmi_codec_resume) }; static const struct hda_device_id hdmi_list[] = { diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 6cb1653be804..4cc24a5d5c31 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1400,24 +1400,20 @@ static int pcm512x_digital_mute(struct snd_soc_dai *dai, int mute) if (ret != 0) { dev_err(component->dev, "Failed to set digital mute: %d\n", ret); - mutex_unlock(&pcm512x->mutex); - return ret; + goto unlock; } regmap_read_poll_timeout(pcm512x->regmap, PCM512x_ANALOG_MUTE_DET, mute_det, (mute_det & 0x3) == 0, 200, 10000); - - mutex_unlock(&pcm512x->mutex); } else { pcm512x->mute &= ~0x1; ret = pcm512x_update_mute(pcm512x); if (ret != 0) { dev_err(component->dev, "Failed to update digital mute: %d\n", ret); - mutex_unlock(&pcm512x->mutex); - return ret; + goto unlock; } regmap_read_poll_timeout(pcm512x->regmap, @@ -1428,9 +1424,10 @@ static int pcm512x_digital_mute(struct snd_soc_dai *dai, int mute) 200, 10000); } +unlock: mutex_unlock(&pcm512x->mutex); - return 0; + return ret; } static const struct snd_soc_dai_ops pcm512x_dai_ops = { diff --git a/sound/soc/codecs/rt274.c b/sound/soc/codecs/rt274.c index 0ef966d56bac..e2855ab9a2c6 100644 --- a/sound/soc/codecs/rt274.c +++ b/sound/soc/codecs/rt274.c @@ -1128,8 +1128,11 @@ static int rt274_i2c_probe(struct i2c_client *i2c, return ret; } - regmap_read(rt274->regmap, + ret = regmap_read(rt274->regmap, RT274_GET_PARAM(AC_NODE_ROOT, AC_PAR_VENDOR_ID), &val); + if (ret) + return ret; + if (val != RT274_VENDOR_ID) { dev_err(&i2c->dev, "Device with ID register %#x is not rt274\n", val); diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 4d46f4567c3a..bec2eefa8b0f 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -280,6 +280,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_component *component) rt5514_dsp = devm_kzalloc(component->dev, sizeof(*rt5514_dsp), GFP_KERNEL); + if (!rt5514_dsp) + return -ENOMEM; rt5514_dsp->dev = &rt5514_spi->dev; mutex_init(&rt5514_dsp->dma_lock); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 34cfaf8f6f34..89c43b26c379 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2512,6 +2512,7 @@ static void rt5682_calibrate(struct rt5682_priv *rt5682) regmap_write(rt5682->regmap, RT5682_PWR_DIG_1, 0x0000); regmap_write(rt5682->regmap, RT5682_CHOP_DAC, 0x2000); regmap_write(rt5682->regmap, RT5682_CALIB_ADC_CTRL, 0x2005); + regmap_write(rt5682->regmap, RT5682_STO1_ADC_MIXER, 0xc0c4); mutex_unlock(&rt5682->calibrate_mutex); diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index d82a8301fd74..96944cff0ed7 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -849,18 +849,18 @@ #define RT5682_SCLK_SRC_PLL2 (0x2 << 13) #define RT5682_SCLK_SRC_SDW (0x3 << 13) #define RT5682_SCLK_SRC_RCCLK (0x4 << 13) -#define RT5682_PLL1_SRC_MASK (0x3 << 10) -#define RT5682_PLL1_SRC_SFT 10 -#define RT5682_PLL1_SRC_MCLK (0x0 << 10) -#define RT5682_PLL1_SRC_BCLK1 (0x1 << 10) -#define RT5682_PLL1_SRC_SDW (0x2 << 10) -#define RT5682_PLL1_SRC_RC (0x3 << 10) -#define RT5682_PLL2_SRC_MASK (0x3 << 8) -#define RT5682_PLL2_SRC_SFT 8 -#define RT5682_PLL2_SRC_MCLK (0x0 << 8) -#define RT5682_PLL2_SRC_BCLK1 (0x1 << 8) -#define RT5682_PLL2_SRC_SDW (0x2 << 8) -#define RT5682_PLL2_SRC_RC (0x3 << 8) +#define RT5682_PLL2_SRC_MASK (0x3 << 10) +#define RT5682_PLL2_SRC_SFT 10 +#define RT5682_PLL2_SRC_MCLK (0x0 << 10) +#define RT5682_PLL2_SRC_BCLK1 (0x1 << 10) +#define RT5682_PLL2_SRC_SDW (0x2 << 10) +#define RT5682_PLL2_SRC_RC (0x3 << 10) +#define RT5682_PLL1_SRC_MASK (0x3 << 8) +#define RT5682_PLL1_SRC_SFT 8 +#define RT5682_PLL1_SRC_MCLK (0x0 << 8) +#define RT5682_PLL1_SRC_BCLK1 (0x1 << 8) +#define RT5682_PLL1_SRC_SDW (0x2 << 8) +#define RT5682_PLL1_SRC_RC (0x3 << 8) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index e2b5a11b16d1..f03195d2ab2e 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -822,6 +822,10 @@ static int aic32x4_set_bias_level(struct snd_soc_component *component, case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: + /* Initial cold start */ + if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) + break; + /* Switch off BCLK_N Divider */ snd_soc_component_update_bits(component, AIC32X4_BCLKN, AIC32X4_BCLKEN, 0); diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index 392d5eef356d..99e07b01a2ce 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -86,49 +86,49 @@ static ssize_t audmux_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", + ret = scnprintf(buf, PAGE_SIZE, "PDCR: %08x\nPTCR: %08x\n", pdcr, ptcr); if (ptcr & IMX_AUDMUX_V2_PTCR_TFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS output from %s, ", audmux_port_string((ptcr >> 27) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_TCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk output from %s", audmux_port_string((ptcr >> 22) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "TxClk input"); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (ptcr & IMX_AUDMUX_V2_PTCR_SYN) { - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Port is symmetric"); } else { if (ptcr & IMX_AUDMUX_V2_PTCR_RFSDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS output from %s, ", audmux_port_string((ptcr >> 17) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxFS input, "); if (ptcr & IMX_AUDMUX_V2_PTCR_RCLKDIR) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk output from %s", audmux_port_string((ptcr >> 12) & 0x7)); else - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "RxClk input"); } - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\nData received from %s\n", audmux_port_string((pdcr >> 13) & 0x7)); diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig index 99a62ba409df..bd9fd2035c55 100644 --- a/sound/soc/intel/Kconfig +++ b/sound/soc/intel/Kconfig @@ -91,7 +91,7 @@ config SND_SST_ATOM_HIFI2_PLATFORM_PCI config SND_SST_ATOM_HIFI2_PLATFORM_ACPI tristate "ACPI HiFi2 (Baytrail, Cherrytrail) Platforms" default ACPI - depends on X86 && ACPI + depends on X86 && ACPI && PCI select SND_SST_IPC_ACPI select SND_SST_ATOM_HIFI2_PLATFORM select SND_SOC_ACPI_INTEL_MATCH diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index afc559866095..91a2436ce952 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -399,7 +399,13 @@ static int sst_media_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) { - snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(params)); + int ret; + + ret = + snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(params)); + if (ret) + return ret; memset(substream->runtime->dma_area, 0, params_buffer_bytes(params)); return 0; } diff --git a/sound/soc/intel/boards/broadwell.c b/sound/soc/intel/boards/broadwell.c index 68e6543e6cb0..99f2a0156ae8 100644 --- a/sound/soc/intel/boards/broadwell.c +++ b/sound/soc/intel/boards/broadwell.c @@ -192,7 +192,7 @@ static struct snd_soc_dai_link broadwell_rt286_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index c74c4f17316f..8f83b182c4f9 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -55,39 +55,6 @@ enum { GLK_DPCM_AUDIO_HDMI3_PB, }; -static int platform_clock_control(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *k, int event) -{ - struct snd_soc_dapm_context *dapm = w->dapm; - struct snd_soc_card *card = dapm->card; - struct snd_soc_dai *codec_dai; - int ret = 0; - - codec_dai = snd_soc_card_get_codec_dai(card, GLK_REALTEK_CODEC_DAI); - if (!codec_dai) { - dev_err(card->dev, "Codec dai not found; Unable to set/unset codec pll\n"); - return -EIO; - } - - if (SND_SOC_DAPM_EVENT_OFF(event)) { - ret = snd_soc_dai_set_sysclk(codec_dai, 0, 0, 0); - if (ret) - dev_err(card->dev, "failed to stop sysclk: %d\n", ret); - } else if (SND_SOC_DAPM_EVENT_ON(event)) { - ret = snd_soc_dai_set_pll(codec_dai, 0, RT5682_PLL1_S_MCLK, - GLK_PLAT_CLK_FREQ, RT5682_PLL_FREQ); - if (ret < 0) { - dev_err(card->dev, "can't set codec pll: %d\n", ret); - return ret; - } - } - - if (ret) - dev_err(card->dev, "failed to start internal clk: %d\n", ret); - - return ret; -} - static const struct snd_kcontrol_new geminilake_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone Jack"), SOC_DAPM_PIN_SWITCH("Headset Mic"), @@ -102,14 +69,10 @@ static const struct snd_soc_dapm_widget geminilake_widgets[] = { SND_SOC_DAPM_SPK("HDMI1", NULL), SND_SOC_DAPM_SPK("HDMI2", NULL), SND_SOC_DAPM_SPK("HDMI3", NULL), - SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, - platform_clock_control, SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD), }; static const struct snd_soc_dapm_route geminilake_map[] = { /* HP jack connectors - unknown if we have jack detection */ - { "Headphone Jack", NULL, "Platform Clock" }, { "Headphone Jack", NULL, "HPOL" }, { "Headphone Jack", NULL, "HPOR" }, @@ -117,7 +80,6 @@ static const struct snd_soc_dapm_route geminilake_map[] = { { "Spk", NULL, "Speaker" }, /* other jacks */ - { "Headset Mic", NULL, "Platform Clock" }, { "IN1P", NULL, "Headset Mic" }, /* digital mics */ @@ -177,6 +139,13 @@ static int geminilake_rt5682_codec_init(struct snd_soc_pcm_runtime *rtd) struct snd_soc_jack *jack; int ret; + ret = snd_soc_dai_set_pll(codec_dai, 0, RT5682_PLL1_S_MCLK, + GLK_PLAT_CLK_FREQ, RT5682_PLL_FREQ); + if (ret < 0) { + dev_err(rtd->dev, "can't set codec pll: %d\n", ret); + return ret; + } + /* Configure sysclk for codec */ ret = snd_soc_dai_set_sysclk(codec_dai, RT5682_SCLK_S_PLL1, RT5682_PLL_FREQ, SND_SOC_CLOCK_IN); diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index eab1f439dd3f..a4022983a7ce 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -146,7 +146,7 @@ static struct snd_soc_dai_link haswell_rt5640_dais[] = { .stream_name = "Loopback", .cpu_dai_name = "Loopback Pin", .platform_name = "haswell-pcm-audio", - .dynamic = 0, + .dynamic = 1, .codec_name = "snd-soc-dummy", .codec_dai_name = "snd-soc-dummy-dai", .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST}, diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index 60c94836bf5b..4ed5b7e17d44 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -336,9 +336,6 @@ static int skl_suspend(struct device *dev) skl->skl_sst->fw_loaded = false; } - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) - snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false); - return 0; } @@ -350,10 +347,6 @@ static int skl_resume(struct device *dev) struct hdac_ext_link *hlink = NULL; int ret; - /* Turned OFF in HDMI codec driver after codec reconfiguration */ - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) - snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, true); - /* * resume only when we are not in suspend active, otherwise need to * restore the device @@ -446,8 +439,10 @@ static int skl_free(struct hdac_bus *bus) snd_hdac_ext_bus_exit(bus); cancel_work_sync(&skl->probe_work); - if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) + if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) { + snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false); snd_hdac_i915_exit(bus); + } return 0; } @@ -814,7 +809,7 @@ static void skl_probe_work(struct work_struct *work) err = skl_platform_register(bus->dev); if (err < 0) { dev_err(bus->dev, "platform register failed: %d\n", err); - return; + goto out_err; } err = skl_machine_device_register(skl); diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 5b986b74dd36..548eb4fa2da6 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -570,10 +570,10 @@ static int q6asm_dai_compr_open(struct snd_compr_stream *stream) prtd->audio_client = q6asm_audio_client_alloc(dev, (q6asm_cb)compress_event_handler, prtd, stream_id, LEGACY_PCM_MODE); - if (!prtd->audio_client) { + if (IS_ERR(prtd->audio_client)) { dev_err(dev, "Could not allocate memory\n"); - kfree(prtd); - return -ENOMEM; + ret = PTR_ERR(prtd->audio_client); + goto free_prtd; } size = COMPR_PLAYBACK_MAX_FRAGMENT_SIZE * @@ -582,7 +582,7 @@ static int q6asm_dai_compr_open(struct snd_compr_stream *stream) &prtd->dma_buffer); if (ret) { dev_err(dev, "Cannot allocate buffer(s)\n"); - return ret; + goto free_client; } if (pdata->sid < 0) @@ -595,6 +595,13 @@ static int q6asm_dai_compr_open(struct snd_compr_stream *stream) runtime->private_data = prtd; return 0; + +free_client: + q6asm_audio_client_free(prtd->audio_client); +free_prtd: + kfree(prtd); + + return ret; } static int q6asm_dai_compr_free(struct snd_compr_stream *stream) @@ -874,7 +881,7 @@ static int of_q6asm_parse_dai_data(struct device *dev, for_each_child_of_node(dev->of_node, node) { ret = of_property_read_u32(node, "reg", &id); - if (ret || id > MAX_SESSIONS || id < 0) { + if (ret || id >= MAX_SESSIONS || id < 0) { dev_err(dev, "valid dai id not found:%d\n", ret); continue; } diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 1db8ef668223..6f66a58e23ca 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -158,17 +158,24 @@ static int sdm845_snd_hw_params(struct snd_pcm_substream *substream, return ret; } +static void sdm845_jack_free(struct snd_jack *jack) +{ + struct snd_soc_component *component = jack->private_data; + + snd_soc_component_set_jack(component, NULL, NULL); +} + static int sdm845_dai_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_component *component; - struct snd_soc_dai_link *dai_link = rtd->dai_link; struct snd_soc_card *card = rtd->card; + struct snd_soc_dai *codec_dai = rtd->codec_dai; + struct snd_soc_dai *cpu_dai = rtd->cpu_dai; struct sdm845_snd_data *pdata = snd_soc_card_get_drvdata(card); - int i, rval; + struct snd_jack *jack; + int rval; if (!pdata->jack_setup) { - struct snd_jack *jack; - rval = snd_soc_card_jack_new(card, "Headset Jack", SND_JACK_HEADSET | SND_JACK_HEADPHONE | @@ -190,16 +197,22 @@ static int sdm845_dai_init(struct snd_soc_pcm_runtime *rtd) pdata->jack_setup = true; } - for (i = 0 ; i < dai_link->num_codecs; i++) { - struct snd_soc_dai *dai = rtd->codec_dais[i]; + switch (cpu_dai->id) { + case PRIMARY_MI2S_RX: + jack = pdata->jack.jack; + component = codec_dai->component; - component = dai->component; - rval = snd_soc_component_set_jack( - component, &pdata->jack, NULL); + jack->private_data = component; + jack->private_free = sdm845_jack_free; + rval = snd_soc_component_set_jack(component, + &pdata->jack, NULL); if (rval != 0 && rval != -ENOTSUPP) { dev_warn(card->dev, "Failed to set jack: %d\n", rval); return rval; } + break; + default: + break; } return 0; diff --git a/sound/soc/sh/dma-sh7760.c b/sound/soc/sh/dma-sh7760.c index 922fb6aa3ed1..5aee11c94f2a 100644 --- a/sound/soc/sh/dma-sh7760.c +++ b/sound/soc/sh/dma-sh7760.c @@ -202,7 +202,7 @@ static int camelot_prepare(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct camelot_pcm *cam = &cam_pcm_data[rtd->cpu_dai->id]; - pr_debug("PCM data: addr 0x%08ulx len %d\n", + pr_debug("PCM data: addr 0x%08lx len %d\n", (u32)runtime->dma_addr, runtime->dma_bytes); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 0462b3ec977a..aae450ba4f08 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -742,7 +742,7 @@ static struct snd_soc_component *soc_find_component( if (of_node) { if (component->dev->of_node == of_node) return component; - } else if (strcmp(component->name, name) == 0) { + } else if (name && strcmp(component->name, name) == 0) { return component; } } @@ -1034,17 +1034,18 @@ static int snd_soc_init_platform(struct snd_soc_card *card, * this function should be removed in the future */ /* convert Legacy platform link */ - if (!platform) { + if (!platform || dai_link->legacy_platform) { platform = devm_kzalloc(card->dev, sizeof(struct snd_soc_dai_link_component), GFP_KERNEL); if (!platform) return -ENOMEM; - dai_link->platform = platform; - platform->name = dai_link->platform_name; - platform->of_node = dai_link->platform_of_node; - platform->dai_name = NULL; + dai_link->platform = platform; + dai_link->legacy_platform = 1; + platform->name = dai_link->platform_name; + platform->of_node = dai_link->platform_of_node; + platform->dai_name = NULL; } /* if there's no platform we match on the empty platform */ @@ -1129,6 +1130,15 @@ static int soc_init_dai_link(struct snd_soc_card *card, link->name); return -EINVAL; } + + /* + * Defer card registartion if platform dai component is not added to + * component list. + */ + if ((link->platform->of_node || link->platform->name) && + !soc_find_component(link->platform->of_node, link->platform->name)) + return -EPROBE_DEFER; + /* * CPU device may be specified by either name or OF node, but * can be left unspecified, and will be matched based on DAI @@ -1140,6 +1150,15 @@ static int soc_init_dai_link(struct snd_soc_card *card, link->name); return -EINVAL; } + + /* + * Defer card registartion if cpu dai component is not added to + * component list. + */ + if ((link->cpu_of_node || link->cpu_name) && + !soc_find_component(link->cpu_of_node, link->cpu_name)) + return -EPROBE_DEFER; + /* * At least one of CPU DAI name or CPU device name/node must be * specified @@ -2739,15 +2758,18 @@ int snd_soc_register_card(struct snd_soc_card *card) if (!card->name || !card->dev) return -EINVAL; + mutex_lock(&client_mutex); for_each_card_prelinks(card, i, link) { ret = soc_init_dai_link(card, link); if (ret) { dev_err(card->dev, "ASoC: failed to init link %s\n", link->name); + mutex_unlock(&client_mutex); return ret; } } + mutex_unlock(&client_mutex); dev_set_drvdata(card->dev, card); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index a5178845065b..2c4c13419539 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2019,19 +2019,19 @@ static ssize_t dapm_widget_power_read_file(struct file *file, out = is_connected_output_ep(w, NULL, NULL); } - ret = snprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", + ret = scnprintf(buf, PAGE_SIZE, "%s: %s%s in %d out %d", w->name, w->power ? "On" : "Off", w->force ? " (forced)" : "", in, out); if (w->reg >= 0) - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " - R%d(0x%x) mask 0x%x", w->reg, w->reg, w->mask << w->shift); - ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); if (w->sname) - ret += snprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " stream %s %s\n", w->sname, w->active ? "active" : "inactive"); @@ -2044,7 +2044,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, if (!p->connect) continue; - ret += snprintf(buf + ret, PAGE_SIZE - ret, + ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %s \"%s\" \"%s\"\n", (rdir == SND_SOC_DAPM_DIR_IN) ? "in" : "out", p->name ? p->name : "static", diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c index eeda6d5565bc..a10fcb5963c6 100644 --- a/sound/soc/ti/davinci-mcasp.c +++ b/sound/soc/ti/davinci-mcasp.c @@ -108,7 +108,7 @@ struct davinci_mcasp { /* Used for comstraint setting on the second stream */ u32 channels; -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM struct davinci_mcasp_context context; #endif @@ -1486,74 +1486,6 @@ static int davinci_mcasp_dai_probe(struct snd_soc_dai *dai) return 0; } -#ifdef CONFIG_PM_SLEEP -static int davinci_mcasp_suspend(struct snd_soc_dai *dai) -{ - struct davinci_mcasp *mcasp = snd_soc_dai_get_drvdata(dai); - struct davinci_mcasp_context *context = &mcasp->context; - u32 reg; - int i; - - context->pm_state = pm_runtime_active(mcasp->dev); - if (!context->pm_state) - pm_runtime_get_sync(mcasp->dev); - - for (i = 0; i < ARRAY_SIZE(context_regs); i++) - context->config_regs[i] = mcasp_get_reg(mcasp, context_regs[i]); - - if (mcasp->txnumevt) { - reg = mcasp->fifo_base + MCASP_WFIFOCTL_OFFSET; - context->afifo_regs[0] = mcasp_get_reg(mcasp, reg); - } - if (mcasp->rxnumevt) { - reg = mcasp->fifo_base + MCASP_RFIFOCTL_OFFSET; - context->afifo_regs[1] = mcasp_get_reg(mcasp, reg); - } - - for (i = 0; i < mcasp->num_serializer; i++) - context->xrsr_regs[i] = mcasp_get_reg(mcasp, - DAVINCI_MCASP_XRSRCTL_REG(i)); - - pm_runtime_put_sync(mcasp->dev); - - return 0; -} - -static int davinci_mcasp_resume(struct snd_soc_dai *dai) -{ - struct davinci_mcasp *mcasp = snd_soc_dai_get_drvdata(dai); - struct davinci_mcasp_context *context = &mcasp->context; - u32 reg; - int i; - - pm_runtime_get_sync(mcasp->dev); - - for (i = 0; i < ARRAY_SIZE(context_regs); i++) - mcasp_set_reg(mcasp, context_regs[i], context->config_regs[i]); - - if (mcasp->txnumevt) { - reg = mcasp->fifo_base + MCASP_WFIFOCTL_OFFSET; - mcasp_set_reg(mcasp, reg, context->afifo_regs[0]); - } - if (mcasp->rxnumevt) { - reg = mcasp->fifo_base + MCASP_RFIFOCTL_OFFSET; - mcasp_set_reg(mcasp, reg, context->afifo_regs[1]); - } - - for (i = 0; i < mcasp->num_serializer; i++) - mcasp_set_reg(mcasp, DAVINCI_MCASP_XRSRCTL_REG(i), - context->xrsr_regs[i]); - - if (!context->pm_state) - pm_runtime_put_sync(mcasp->dev); - - return 0; -} -#else -#define davinci_mcasp_suspend NULL -#define davinci_mcasp_resume NULL -#endif - #define DAVINCI_MCASP_RATES SNDRV_PCM_RATE_8000_192000 #define DAVINCI_MCASP_PCM_FMTS (SNDRV_PCM_FMTBIT_S8 | \ @@ -1571,8 +1503,6 @@ static struct snd_soc_dai_driver davinci_mcasp_dai[] = { { .name = "davinci-mcasp.0", .probe = davinci_mcasp_dai_probe, - .suspend = davinci_mcasp_suspend, - .resume = davinci_mcasp_resume, .playback = { .channels_min = 1, .channels_max = 32 * 16, @@ -1976,7 +1906,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) } mcasp->num_serializer = pdata->num_serializer; -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_PM mcasp->context.xrsr_regs = devm_kcalloc(&pdev->dev, mcasp->num_serializer, sizeof(u32), GFP_KERNEL); @@ -2196,11 +2126,73 @@ static int davinci_mcasp_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int davinci_mcasp_runtime_suspend(struct device *dev) +{ + struct davinci_mcasp *mcasp = dev_get_drvdata(dev); + struct davinci_mcasp_context *context = &mcasp->context; + u32 reg; + int i; + + for (i = 0; i < ARRAY_SIZE(context_regs); i++) + context->config_regs[i] = mcasp_get_reg(mcasp, context_regs[i]); + + if (mcasp->txnumevt) { + reg = mcasp->fifo_base + MCASP_WFIFOCTL_OFFSET; + context->afifo_regs[0] = mcasp_get_reg(mcasp, reg); + } + if (mcasp->rxnumevt) { + reg = mcasp->fifo_base + MCASP_RFIFOCTL_OFFSET; + context->afifo_regs[1] = mcasp_get_reg(mcasp, reg); + } + + for (i = 0; i < mcasp->num_serializer; i++) + context->xrsr_regs[i] = mcasp_get_reg(mcasp, + DAVINCI_MCASP_XRSRCTL_REG(i)); + + return 0; +} + +static int davinci_mcasp_runtime_resume(struct device *dev) +{ + struct davinci_mcasp *mcasp = dev_get_drvdata(dev); + struct davinci_mcasp_context *context = &mcasp->context; + u32 reg; + int i; + + for (i = 0; i < ARRAY_SIZE(context_regs); i++) + mcasp_set_reg(mcasp, context_regs[i], context->config_regs[i]); + + if (mcasp->txnumevt) { + reg = mcasp->fifo_base + MCASP_WFIFOCTL_OFFSET; + mcasp_set_reg(mcasp, reg, context->afifo_regs[0]); + } + if (mcasp->rxnumevt) { + reg = mcasp->fifo_base + MCASP_RFIFOCTL_OFFSET; + mcasp_set_reg(mcasp, reg, context->afifo_regs[1]); + } + + for (i = 0; i < mcasp->num_serializer; i++) + mcasp_set_reg(mcasp, DAVINCI_MCASP_XRSRCTL_REG(i), + context->xrsr_regs[i]); + + return 0; +} + +#endif + +static const struct dev_pm_ops davinci_mcasp_pm_ops = { + SET_RUNTIME_PM_OPS(davinci_mcasp_runtime_suspend, + davinci_mcasp_runtime_resume, + NULL) +}; + static struct platform_driver davinci_mcasp_driver = { .probe = davinci_mcasp_probe, .remove = davinci_mcasp_remove, .driver = { .name = "davinci-mcasp", + .pm = &davinci_mcasp_pm_ops, .of_match_table = mcasp_dt_ids, }, }; diff --git a/sound/soc/xilinx/Kconfig b/sound/soc/xilinx/Kconfig index 25e287feb58c..723a583a8d57 100644 --- a/sound/soc/xilinx/Kconfig +++ b/sound/soc/xilinx/Kconfig @@ -1,5 +1,5 @@ config SND_SOC_XILINX_I2S - tristate "Audio support for the the Xilinx I2S" + tristate "Audio support for the Xilinx I2S" help Select this option to enable Xilinx I2S Audio. This enables I2S playback and capture using xilinx soft IP. In transmitter diff --git a/sound/soc/xilinx/xlnx_i2s.c b/sound/soc/xilinx/xlnx_i2s.c index d4ae9eff41ce..8b353166ad44 100644 --- a/sound/soc/xilinx/xlnx_i2s.c +++ b/sound/soc/xilinx/xlnx_i2s.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -/* - * Xilinx ASoC I2S audio support - * - * Copyright (C) 2018 Xilinx, Inc. - * - * Author: Praveen Vuppala - * Author: Maruthi Srinivas Bayyavarapu - */ +// +// Xilinx ASoC I2S audio support +// +// Copyright (C) 2018 Xilinx, Inc. +// +// Author: Praveen Vuppala +// Author: Maruthi Srinivas Bayyavarapu #include #include diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index ff91192407d1..f599064dd8dc 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -47,6 +47,7 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_DAR, PERF_REG_POWERPC_DSISR, PERF_REG_POWERPC_SIER, + PERF_REG_POWERPC_MMCRA, PERF_REG_POWERPC_MAX, }; #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index d07ccf8a23f7..d43fce568ef7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -142,5 +142,6 @@ SEE ALSO **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8), + **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst new file mode 100644 index 000000000000..8d489a26e3c9 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -0,0 +1,85 @@ +=============== +bpftool-feature +=============== +------------------------------------------------------------------------------- +tool for inspection of eBPF-related parameters for Linux kernel or net device +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **feature** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMANDS* := { **probe** | **help** } + +MAP COMMANDS +============= + +| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]] +| **bpftool** **feature help** +| +| *COMPONENT* := { **kernel** | **dev** *NAME* } + +DESCRIPTION +=========== + **bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]] + Probe the running kernel and dump a number of eBPF-related + parameters, such as availability of the **bpf()** system call, + JIT status, eBPF program types availability, eBPF helper + functions availability, and more. + + If the **macros** keyword (but not the **-j** option) is + passed, a subset of the output is dumped as a list of + **#define** macros that are ready to be included in a C + header file, for example. If, additionally, **prefix** is + used to define a *PREFIX*, the provided string will be used + as a prefix to the names of the macros: this can be used to + avoid conflicts on macro names when including the output of + this command as a header file. + + Keyword **kernel** can be omitted. If no probe target is + specified, probing the kernel is the default behaviour. + + Note that when probed, some eBPF helpers (e.g. + **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may + print warnings to kernel logs. + + **bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]] + Probe network device for supported eBPF features and dump + results to the console. + + The two keywords **macros** and **prefix** have the same + role as when probing the kernel. + + **bpftool feature help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 64b001b4f777..5c984ffc9f01 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -25,12 +25,17 @@ MAP COMMANDS | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ | **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] | **bpftool** **map dump** *MAP* -| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] -| **bpftool** **map lookup** *MAP* **key** *DATA* +| **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] +| **bpftool** **map lookup** *MAP* [**key** *DATA*] | **bpftool** **map getnext** *MAP* [**key** *DATA*] | **bpftool** **map delete** *MAP* **key** *DATA* | **bpftool** **map pin** *MAP* *FILE* | **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] +| **bpftool** **map peek** *MAP* +| **bpftool** **map push** *MAP* **value** *VALUE* +| **bpftool** **map pop** *MAP* +| **bpftool** **map enqueue** *MAP* **value** *VALUE* +| **bpftool** **map dequeue** *MAP* | **bpftool** **map help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -62,7 +67,7 @@ DESCRIPTION **bpftool map dump** *MAP* Dump all entries in a given *MAP*. - **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] + **bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] Update map entry for a given *KEY*. *UPDATE_FLAGS* can be one of: **any** update existing entry @@ -75,7 +80,7 @@ DESCRIPTION the bytes are parsed as decimal values, unless a "0x" prefix (for hexadecimal) or a "0" prefix (for octal) is provided. - **bpftool map lookup** *MAP* **key** *DATA* + **bpftool map lookup** *MAP* [**key** *DATA*] Lookup **key** in the map. **bpftool map getnext** *MAP* [**key** *DATA*] @@ -107,6 +112,21 @@ DESCRIPTION replace any existing ring. Any other application will stop receiving events if it installed its rings earlier. + **bpftool map peek** *MAP* + Peek next **value** in the queue or stack. + + **bpftool map push** *MAP* **value** *VALUE* + Push **value** onto the stack. + + **bpftool map pop** *MAP* + Pop and print **value** from the stack. + + **bpftool map enqueue** *MAP* **value** *VALUE* + Enqueue **value** into the queue. + + **bpftool map dequeue** *MAP* + Dequeue and print **value** from the queue. + **bpftool map help** Print short help message. @@ -236,5 +256,6 @@ SEE ALSO **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index ed87c9b619ad..779dab3650ee 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -142,4 +142,5 @@ SEE ALSO **bpftool-prog**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index f4c5e5538bb8..bca5590a80d0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -84,4 +84,5 @@ SEE ALSO **bpftool-prog**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 58c8369b77dd..13b56102f528 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -258,5 +258,6 @@ SEE ALSO **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index e1677e81ed59..27153bb816ac 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -72,5 +72,6 @@ SEE ALSO **bpftool-prog**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 492f0f24e2d3..4ad1f0894d53 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -93,9 +93,16 @@ BFD_SRCS = jit_disasm.c SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) ifeq ($(feature-libbfd),1) + LIBS += -lbfd -ldl -lopcodes +else ifeq ($(feature-libbfd-liberty),1) + LIBS += -lbfd -ldl -lopcodes -liberty +else ifeq ($(feature-libbfd-liberty-z),1) + LIBS += -lbfd -ldl -lopcodes -liberty -lz +endif + +ifneq ($(filter -lbfd,$(LIBS)),) CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) -LIBS += -lbfd -lopcodes endif OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index e4e4fab1b8c7..763dd12482aa 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -50,14 +50,15 @@ _bpftool_get_map_ids() command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } -_bpftool_get_perf_map_ids() +# Takes map type and adds matching map ids to the list of suggestions. +_bpftool_get_map_ids_for_type() { + local type="$1" COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ - command grep -C2 perf_event_array | \ + command grep -C2 "$type" | \ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } - _bpftool_get_prog_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ @@ -99,15 +100,25 @@ _sysfs_get_netdevs() "$cur" ) ) } -# For bpftool map update: retrieve type of the map to update. -_bpftool_map_update_map_type() +# Retrieve type of the map that we are operating on. +_bpftool_map_guess_map_type() { local keyword ref for (( idx=3; idx < ${#words[@]}-1; idx++ )); do - if [[ ${words[$((idx-2))]} == "update" ]]; then - keyword=${words[$((idx-1))]} - ref=${words[$((idx))]} - fi + case "${words[$((idx-2))]}" in + lookup|update) + keyword=${words[$((idx-1))]} + ref=${words[$((idx))]} + ;; + push) + printf "stack" + return 0 + ;; + enqueue) + printf "queue" + return 0 + ;; + esac done [[ -z $ref ]] && return 0 @@ -119,6 +130,8 @@ _bpftool_map_update_map_type() _bpftool_map_update_get_id() { + local command="$1" + # Is it the map to update, or a map to insert into the map to update? # Search for "value" keyword. local idx value @@ -128,11 +141,24 @@ _bpftool_map_update_get_id() break fi done - [[ $value -eq 0 ]] && _bpftool_get_map_ids && return 0 + if [[ $value -eq 0 ]]; then + case "$command" in + push) + _bpftool_get_map_ids_for_type stack + ;; + enqueue) + _bpftool_get_map_ids_for_type queue + ;; + *) + _bpftool_get_map_ids + ;; + esac + return 0 + fi # Id to complete is for a value. It can be either prog id or map id. This # depends on the type of the map to update. - local type=$(_bpftool_map_update_map_type) + local type=$(_bpftool_map_guess_map_type) case $type in array_of_maps|hash_of_maps) _bpftool_get_map_ids @@ -382,14 +408,28 @@ _bpftool() map) local MAP_TYPE='id pinned' case $command in - show|list|dump) + show|list|dump|peek|pop|dequeue) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) return 0 ;; id) - _bpftool_get_map_ids + case "$command" in + peek) + _bpftool_get_map_ids_for_type stack + _bpftool_get_map_ids_for_type queue + ;; + pop) + _bpftool_get_map_ids_for_type stack + ;; + dequeue) + _bpftool_get_map_ids_for_type queue + ;; + *) + _bpftool_get_map_ids + ;; + esac return 0 ;; *) @@ -447,19 +487,25 @@ _bpftool() COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; *) + case $(_bpftool_map_guess_map_type) in + queue|stack) + return 0 + ;; + esac + _bpftool_once_attr 'key' return 0 ;; esac ;; - update) + update|push|enqueue) case $prev in $command) COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) return 0 ;; id) - _bpftool_map_update_get_id + _bpftool_map_update_get_id $command return 0 ;; key) @@ -468,7 +514,7 @@ _bpftool() value) # We can have bytes, or references to a prog or a # map, depending on the type of the map to update. - case $(_bpftool_map_update_map_type) in + case "$(_bpftool_map_guess_map_type)" in array_of_maps|hash_of_maps) local MAP_TYPE='id pinned' COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ @@ -490,6 +536,13 @@ _bpftool() return 0 ;; *) + case $(_bpftool_map_guess_map_type) in + queue|stack) + _bpftool_once_attr 'value' + return 0; + ;; + esac + _bpftool_once_attr 'key' local UPDATE_FLAGS='any exist noexist' for (( idx=3; idx < ${#words[@]}-1; idx++ )); do @@ -508,6 +561,7 @@ _bpftool() return 0 fi done + return 0 ;; esac @@ -527,7 +581,7 @@ _bpftool() return 0 ;; id) - _bpftool_get_perf_map_ids + _bpftool_get_map_ids_for_type perf_event_array return 0 ;; cpu) @@ -546,7 +600,8 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin event_pipe show list update create' -- \ + lookup pin event_pipe show list update create \ + peek push enqueue pop dequeue' -- \ "$cur" ) ) ;; esac @@ -624,6 +679,25 @@ _bpftool() ;; esac ;; + feature) + case $command in + probe) + [[ $prev == "dev" ]] && _sysfs_get_netdevs && return 0 + [[ $prev == "prefix" ]] && return 0 + if _bpftool_search_list 'macros'; then + COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) ) + else + COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) ) + fi + _bpftool_one_of_list 'kernel dev' + return 0 + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help probe' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 6ba5f567a9d8..e63bce0755eb 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -73,35 +73,104 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, return ret; } +static void btf_int128_print(json_writer_t *jw, const void *data, + bool is_plain_text) +{ + /* data points to a __int128 number. + * Suppose + * int128_num = *(__int128 *)data; + * The below formulas shows what upper_num and lower_num represents: + * upper_num = int128_num >> 64; + * lower_num = int128_num & 0xffffffffFFFFFFFFULL; + */ + __u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = *(__u64 *)data; + lower_num = *(__u64 *)(data + 8); +#else + upper_num = *(__u64 *)(data + 8); + lower_num = *(__u64 *)data; +#endif + + if (is_plain_text) { + if (upper_num == 0) + jsonw_printf(jw, "0x%llx", lower_num); + else + jsonw_printf(jw, "0x%llx%016llx", upper_num, lower_num); + } else { + if (upper_num == 0) + jsonw_printf(jw, "\"0x%llx\"", lower_num); + else + jsonw_printf(jw, "\"0x%llx%016llx\"", upper_num, lower_num); + } +} + +static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits, + u16 right_shift_bits) +{ + __u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = print_num[0]; + lower_num = print_num[1]; +#else + upper_num = print_num[1]; + lower_num = print_num[0]; +#endif + + /* shake out un-needed bits by shift/or operations */ + if (left_shift_bits >= 64) { + upper_num = lower_num << (left_shift_bits - 64); + lower_num = 0; + } else { + upper_num = (upper_num << left_shift_bits) | + (lower_num >> (64 - left_shift_bits)); + lower_num = lower_num << left_shift_bits; + } + + if (right_shift_bits >= 64) { + lower_num = upper_num >> (right_shift_bits - 64); + upper_num = 0; + } else { + lower_num = (lower_num >> right_shift_bits) | + (upper_num << (64 - right_shift_bits)); + upper_num = upper_num >> right_shift_bits; + } + +#ifdef __BIG_ENDIAN_BITFIELD + print_num[0] = upper_num; + print_num[1] = lower_num; +#else + print_num[0] = lower_num; + print_num[1] = upper_num; +#endif +} + static void btf_dumper_bitfield(__u32 nr_bits, __u8 bit_offset, const void *data, json_writer_t *jw, bool is_plain_text) { int left_shift_bits, right_shift_bits; + __u64 print_num[2] = {}; int bytes_to_copy; int bits_to_copy; - __u64 print_num; bits_to_copy = bit_offset + nr_bits; bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy); - print_num = 0; - memcpy(&print_num, data, bytes_to_copy); + memcpy(print_num, data, bytes_to_copy); #if defined(__BIG_ENDIAN_BITFIELD) left_shift_bits = bit_offset; #elif defined(__LITTLE_ENDIAN_BITFIELD) - left_shift_bits = 64 - bits_to_copy; + left_shift_bits = 128 - bits_to_copy; #else #error neither big nor little endian #endif - right_shift_bits = 64 - nr_bits; + right_shift_bits = 128 - nr_bits; - print_num <<= left_shift_bits; - print_num >>= right_shift_bits; - if (is_plain_text) - jsonw_printf(jw, "0x%llx", print_num); - else - jsonw_printf(jw, "%llu", print_num); + btf_int128_shift(print_num, left_shift_bits, right_shift_bits); + btf_int128_print(jw, print_num, is_plain_text); } @@ -113,7 +182,7 @@ static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, int total_bits_offset; /* bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. + * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); @@ -139,6 +208,11 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, return 0; } + if (nr_bits == 128) { + btf_int128_print(jw, data, is_plain_text); + return 0; + } + switch (BTF_INT_ENCODING(*int_type)) { case 0: if (BTF_INT_BITS(*int_type) == 64) diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index 31f0db41513f..3e21f994f262 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -157,6 +157,11 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur, return false; } +static bool is_jmp_insn(u8 code) +{ + return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32; +} + static bool func_partition_bb_head(struct func_node *func) { struct bpf_insn *cur, *end; @@ -170,7 +175,7 @@ static bool func_partition_bb_head(struct func_node *func) return true; for (; cur <= end; cur++) { - if (BPF_CLASS(cur->code) == BPF_JMP) { + if (is_jmp_insn(cur->code)) { u8 opcode = BPF_OP(cur->code); if (opcode == BPF_EXIT || opcode == BPF_CALL) @@ -296,7 +301,7 @@ static bool func_add_bb_edges(struct func_node *func) e->src = bb; insn = bb->tail; - if (BPF_CLASS(insn->code) != BPF_JMP || + if (!is_jmp_insn(insn->code) || BPF_OP(insn->code) == BPF_EXIT) { e->dst = bb_next(bb); e->flags |= EDGE_FLAG_FALLTHROUGH; diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c new file mode 100644 index 000000000000..d672d9086fff --- /dev/null +++ b/tools/bpf/bpftool/feature.c @@ -0,0 +1,764 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2019 Netronome Systems, Inc. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "main.h" + +#ifndef PROC_SUPER_MAGIC +# define PROC_SUPER_MAGIC 0x9fa0 +#endif + +enum probe_component { + COMPONENT_UNSPEC, + COMPONENT_KERNEL, + COMPONENT_DEVICE, +}; + +#define BPF_HELPER_MAKE_ENTRY(name) [BPF_FUNC_ ## name] = "bpf_" # name +static const char * const helper_name[] = { + __BPF_FUNC_MAPPER(BPF_HELPER_MAKE_ENTRY) +}; + +#undef BPF_HELPER_MAKE_ENTRY + +/* Miscellaneous utility functions */ + +static bool check_procfs(void) +{ + struct statfs st_fs; + + if (statfs("/proc", &st_fs) < 0) + return false; + if ((unsigned long)st_fs.f_type != PROC_SUPER_MAGIC) + return false; + + return true; +} + +static void uppercase(char *str, size_t len) +{ + size_t i; + + for (i = 0; i < len && str[i] != '\0'; i++) + str[i] = toupper(str[i]); +} + +/* Printing utility functions */ + +static void +print_bool_feature(const char *feat_name, const char *plain_name, + const char *define_name, bool res, const char *define_prefix) +{ + if (json_output) + jsonw_bool_field(json_wtr, feat_name, res); + else if (define_prefix) + printf("#define %s%sHAVE_%s\n", define_prefix, + res ? "" : "NO_", define_name); + else + printf("%s is %savailable\n", plain_name, res ? "" : "NOT "); +} + +static void print_kernel_option(const char *name, const char *value) +{ + char *endptr; + int res; + + /* No support for C-style ouptut */ + + if (json_output) { + if (!value) { + jsonw_null_field(json_wtr, name); + return; + } + errno = 0; + res = strtol(value, &endptr, 0); + if (!errno && *endptr == '\n') + jsonw_int_field(json_wtr, name, res); + else + jsonw_string_field(json_wtr, name, value); + } else { + if (value) + printf("%s is set to %s\n", name, value); + else + printf("%s is not set\n", name); + } +} + +static void +print_start_section(const char *json_title, const char *plain_title, + const char *define_comment, const char *define_prefix) +{ + if (json_output) { + jsonw_name(json_wtr, json_title); + jsonw_start_object(json_wtr); + } else if (define_prefix) { + printf("%s\n", define_comment); + } else { + printf("%s\n", plain_title); + } +} + +static void +print_end_then_start_section(const char *json_title, const char *plain_title, + const char *define_comment, + const char *define_prefix) +{ + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + + print_start_section(json_title, plain_title, define_comment, + define_prefix); +} + +/* Probing functions */ + +static int read_procfs(const char *path) +{ + char *endptr, *line = NULL; + size_t len = 0; + FILE *fd; + int res; + + fd = fopen(path, "r"); + if (!fd) + return -1; + + res = getline(&line, &len, fd); + fclose(fd); + if (res < 0) + return -1; + + errno = 0; + res = strtol(line, &endptr, 10); + if (errno || *line == '\0' || *endptr != '\n') + res = -1; + free(line); + + return res; +} + +static void probe_unprivileged_disabled(void) +{ + int res; + + /* No support for C-style ouptut */ + + res = read_procfs("/proc/sys/kernel/unprivileged_bpf_disabled"); + if (json_output) { + jsonw_int_field(json_wtr, "unprivileged_bpf_disabled", res); + } else { + switch (res) { + case 0: + printf("bpf() syscall for unprivileged users is enabled\n"); + break; + case 1: + printf("bpf() syscall restricted to privileged users\n"); + break; + case -1: + printf("Unable to retrieve required privileges for bpf() syscall\n"); + break; + default: + printf("bpf() syscall restriction has unknown value %d\n", res); + } + } +} + +static void probe_jit_enable(void) +{ + int res; + + /* No support for C-style ouptut */ + + res = read_procfs("/proc/sys/net/core/bpf_jit_enable"); + if (json_output) { + jsonw_int_field(json_wtr, "bpf_jit_enable", res); + } else { + switch (res) { + case 0: + printf("JIT compiler is disabled\n"); + break; + case 1: + printf("JIT compiler is enabled\n"); + break; + case 2: + printf("JIT compiler is enabled with debugging traces in kernel logs\n"); + break; + case -1: + printf("Unable to retrieve JIT-compiler status\n"); + break; + default: + printf("JIT-compiler status has unknown value %d\n", + res); + } + } +} + +static void probe_jit_harden(void) +{ + int res; + + /* No support for C-style ouptut */ + + res = read_procfs("/proc/sys/net/core/bpf_jit_harden"); + if (json_output) { + jsonw_int_field(json_wtr, "bpf_jit_harden", res); + } else { + switch (res) { + case 0: + printf("JIT compiler hardening is disabled\n"); + break; + case 1: + printf("JIT compiler hardening is enabled for unprivileged users\n"); + break; + case 2: + printf("JIT compiler hardening is enabled for all users\n"); + break; + case -1: + printf("Unable to retrieve JIT hardening status\n"); + break; + default: + printf("JIT hardening status has unknown value %d\n", + res); + } + } +} + +static void probe_jit_kallsyms(void) +{ + int res; + + /* No support for C-style ouptut */ + + res = read_procfs("/proc/sys/net/core/bpf_jit_kallsyms"); + if (json_output) { + jsonw_int_field(json_wtr, "bpf_jit_kallsyms", res); + } else { + switch (res) { + case 0: + printf("JIT compiler kallsyms exports are disabled\n"); + break; + case 1: + printf("JIT compiler kallsyms exports are enabled for root\n"); + break; + case -1: + printf("Unable to retrieve JIT kallsyms export status\n"); + break; + default: + printf("JIT kallsyms exports status has unknown value %d\n", res); + } + } +} + +static void probe_jit_limit(void) +{ + int res; + + /* No support for C-style ouptut */ + + res = read_procfs("/proc/sys/net/core/bpf_jit_limit"); + if (json_output) { + jsonw_int_field(json_wtr, "bpf_jit_limit", res); + } else { + switch (res) { + case -1: + printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n"); + break; + default: + printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res); + } + } +} + +static char *get_kernel_config_option(FILE *fd, const char *option) +{ + size_t line_n = 0, optlen = strlen(option); + char *res, *strval, *line = NULL; + ssize_t n; + + rewind(fd); + while ((n = getline(&line, &line_n, fd)) > 0) { + if (strncmp(line, option, optlen)) + continue; + /* Check we have at least '=', value, and '\n' */ + if (strlen(line) < optlen + 3) + continue; + if (*(line + optlen) != '=') + continue; + + /* Trim ending '\n' */ + line[strlen(line) - 1] = '\0'; + + /* Copy and return config option value */ + strval = line + optlen + 1; + res = strdup(strval); + free(line); + return res; + } + free(line); + + return NULL; +} + +static void probe_kernel_image_config(void) +{ + static const char * const options[] = { + /* Enable BPF */ + "CONFIG_BPF", + /* Enable bpf() syscall */ + "CONFIG_BPF_SYSCALL", + /* Does selected architecture support eBPF JIT compiler */ + "CONFIG_HAVE_EBPF_JIT", + /* Compile eBPF JIT compiler */ + "CONFIG_BPF_JIT", + /* Avoid compiling eBPF interpreter (use JIT only) */ + "CONFIG_BPF_JIT_ALWAYS_ON", + + /* cgroups */ + "CONFIG_CGROUPS", + /* BPF programs attached to cgroups */ + "CONFIG_CGROUP_BPF", + /* bpf_get_cgroup_classid() helper */ + "CONFIG_CGROUP_NET_CLASSID", + /* bpf_skb_{,ancestor_}cgroup_id() helpers */ + "CONFIG_SOCK_CGROUP_DATA", + + /* Tracing: attach BPF to kprobes, tracepoints, etc. */ + "CONFIG_BPF_EVENTS", + /* Kprobes */ + "CONFIG_KPROBE_EVENTS", + /* Uprobes */ + "CONFIG_UPROBE_EVENTS", + /* Tracepoints */ + "CONFIG_TRACING", + /* Syscall tracepoints */ + "CONFIG_FTRACE_SYSCALLS", + /* bpf_override_return() helper support for selected arch */ + "CONFIG_FUNCTION_ERROR_INJECTION", + /* bpf_override_return() helper */ + "CONFIG_BPF_KPROBE_OVERRIDE", + + /* Network */ + "CONFIG_NET", + /* AF_XDP sockets */ + "CONFIG_XDP_SOCKETS", + /* BPF_PROG_TYPE_LWT_* and related helpers */ + "CONFIG_LWTUNNEL_BPF", + /* BPF_PROG_TYPE_SCHED_ACT, TC (traffic control) actions */ + "CONFIG_NET_ACT_BPF", + /* BPF_PROG_TYPE_SCHED_CLS, TC filters */ + "CONFIG_NET_CLS_BPF", + /* TC clsact qdisc */ + "CONFIG_NET_CLS_ACT", + /* Ingress filtering with TC */ + "CONFIG_NET_SCH_INGRESS", + /* bpf_skb_get_xfrm_state() helper */ + "CONFIG_XFRM", + /* bpf_get_route_realm() helper */ + "CONFIG_IP_ROUTE_CLASSID", + /* BPF_PROG_TYPE_LWT_SEG6_LOCAL and related helpers */ + "CONFIG_IPV6_SEG6_BPF", + /* BPF_PROG_TYPE_LIRC_MODE2 and related helpers */ + "CONFIG_BPF_LIRC_MODE2", + /* BPF stream parser and BPF socket maps */ + "CONFIG_BPF_STREAM_PARSER", + /* xt_bpf module for passing BPF programs to netfilter */ + "CONFIG_NETFILTER_XT_MATCH_BPF", + /* bpfilter back-end for iptables */ + "CONFIG_BPFILTER", + /* bpftilter module with "user mode helper" */ + "CONFIG_BPFILTER_UMH", + + /* test_bpf module for BPF tests */ + "CONFIG_TEST_BPF", + }; + char *value, *buf = NULL; + struct utsname utsn; + char path[PATH_MAX]; + size_t i, n; + ssize_t ret; + FILE *fd; + + if (uname(&utsn)) + goto no_config; + + snprintf(path, sizeof(path), "/boot/config-%s", utsn.release); + + fd = fopen(path, "r"); + if (!fd && errno == ENOENT) { + /* Some distributions put the config file at /proc/config, give + * it a try. + * Sometimes it is also at /proc/config.gz but we do not try + * this one for now, it would require linking against libz. + */ + fd = fopen("/proc/config", "r"); + } + if (!fd) { + p_info("skipping kernel config, can't open file: %s", + strerror(errno)); + goto no_config; + } + /* Sanity checks */ + ret = getline(&buf, &n, fd); + ret = getline(&buf, &n, fd); + if (!buf || !ret) { + p_info("skipping kernel config, can't read from file: %s", + strerror(errno)); + free(buf); + goto no_config; + } + if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) { + p_info("skipping kernel config, can't find correct file"); + free(buf); + goto no_config; + } + free(buf); + + for (i = 0; i < ARRAY_SIZE(options); i++) { + value = get_kernel_config_option(fd, options[i]); + print_kernel_option(options[i], value); + free(value); + } + fclose(fd); + return; + +no_config: + for (i = 0; i < ARRAY_SIZE(options); i++) + print_kernel_option(options[i], NULL); +} + +static bool probe_bpf_syscall(const char *define_prefix) +{ + bool res; + + bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0); + res = (errno != ENOSYS); + + print_bool_feature("have_bpf_syscall", + "bpf() syscall", + "BPF_SYSCALL", + res, define_prefix); + + return res; +} + +static void +probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, + const char *define_prefix, __u32 ifindex) +{ + char feat_name[128], plain_desc[128], define_name[128]; + const char *plain_comment = "eBPF program_type "; + size_t maxlen; + bool res; + + if (ifindex) + /* Only test offload-able program types */ + switch (prog_type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_XDP: + break; + default: + return; + } + + res = bpf_probe_prog_type(prog_type, ifindex); + + supported_types[prog_type] |= res; + + maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; + if (strlen(prog_type_name[prog_type]) > maxlen) { + p_info("program type name too long"); + return; + } + + sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]); + sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]); + uppercase(define_name, sizeof(define_name)); + sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]); + print_bool_feature(feat_name, plain_desc, define_name, res, + define_prefix); +} + +static void +probe_map_type(enum bpf_map_type map_type, const char *define_prefix, + __u32 ifindex) +{ + char feat_name[128], plain_desc[128], define_name[128]; + const char *plain_comment = "eBPF map_type "; + size_t maxlen; + bool res; + + res = bpf_probe_map_type(map_type, ifindex); + + maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; + if (strlen(map_type_name[map_type]) > maxlen) { + p_info("map type name too long"); + return; + } + + sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]); + sprintf(define_name, "%s_map_type", map_type_name[map_type]); + uppercase(define_name, sizeof(define_name)); + sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]); + print_bool_feature(feat_name, plain_desc, define_name, res, + define_prefix); +} + +static void +probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, + const char *define_prefix, __u32 ifindex) +{ + const char *ptype_name = prog_type_name[prog_type]; + char feat_name[128]; + unsigned int id; + bool res; + + if (ifindex) + /* Only test helpers for offload-able program types */ + switch (prog_type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_XDP: + break; + default: + return; + } + + if (json_output) { + sprintf(feat_name, "%s_available_helpers", ptype_name); + jsonw_name(json_wtr, feat_name); + jsonw_start_array(json_wtr); + } else if (!define_prefix) { + printf("eBPF helpers supported for program type %s:", + ptype_name); + } + + for (id = 1; id < ARRAY_SIZE(helper_name); id++) { + if (!supported_type) + res = false; + else + res = bpf_probe_helper(id, prog_type, ifindex); + + if (json_output) { + if (res) + jsonw_string(json_wtr, helper_name[id]); + } else if (define_prefix) { + printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n", + define_prefix, ptype_name, helper_name[id], + res ? "1" : "0"); + } else { + if (res) + printf("\n\t- %s", helper_name[id]); + } + } + + if (json_output) + jsonw_end_array(json_wtr); + else if (!define_prefix) + printf("\n"); +} + +static int do_probe(int argc, char **argv) +{ + enum probe_component target = COMPONENT_UNSPEC; + const char *define_prefix = NULL; + bool supported_types[128] = {}; + __u32 ifindex = 0; + unsigned int i; + char *ifname; + + /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN). + * Let's approximate, and restrict usage to root user only. + */ + if (geteuid()) { + p_err("please run this command as root user"); + return -1; + } + + set_max_rlimit(); + + while (argc) { + if (is_prefix(*argv, "kernel")) { + if (target != COMPONENT_UNSPEC) { + p_err("component to probe already specified"); + return -1; + } + target = COMPONENT_KERNEL; + NEXT_ARG(); + } else if (is_prefix(*argv, "dev")) { + NEXT_ARG(); + + if (target != COMPONENT_UNSPEC || ifindex) { + p_err("component to probe already specified"); + return -1; + } + if (!REQ_ARGS(1)) + return -1; + + target = COMPONENT_DEVICE; + ifname = GET_ARG(); + ifindex = if_nametoindex(ifname); + if (!ifindex) { + p_err("unrecognized netdevice '%s': %s", ifname, + strerror(errno)); + return -1; + } + } else if (is_prefix(*argv, "macros") && !define_prefix) { + define_prefix = ""; + NEXT_ARG(); + } else if (is_prefix(*argv, "prefix")) { + if (!define_prefix) { + p_err("'prefix' argument can only be use after 'macros'"); + return -1; + } + if (strcmp(define_prefix, "")) { + p_err("'prefix' already defined"); + return -1; + } + NEXT_ARG(); + + if (!REQ_ARGS(1)) + return -1; + define_prefix = GET_ARG(); + } else { + p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?", + *argv); + return -1; + } + } + + if (json_output) { + define_prefix = NULL; + jsonw_start_object(json_wtr); + } + + switch (target) { + case COMPONENT_KERNEL: + case COMPONENT_UNSPEC: + if (define_prefix) + break; + + print_start_section("system_config", + "Scanning system configuration...", + NULL, /* define_comment never used here */ + NULL); /* define_prefix always NULL here */ + if (check_procfs()) { + probe_unprivileged_disabled(); + probe_jit_enable(); + probe_jit_harden(); + probe_jit_kallsyms(); + probe_jit_limit(); + } else { + p_info("/* procfs not mounted, skipping related probes */"); + } + probe_kernel_image_config(); + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + break; + default: + break; + } + + print_start_section("syscall_config", + "Scanning system call availability...", + "/*** System call availability ***/", + define_prefix); + + if (!probe_bpf_syscall(define_prefix)) + /* bpf() syscall unavailable, don't probe other BPF features */ + goto exit_close_json; + + print_end_then_start_section("program_types", + "Scanning eBPF program types...", + "/*** eBPF program types ***/", + define_prefix); + + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_prog_type(i, supported_types, define_prefix, ifindex); + + print_end_then_start_section("map_types", + "Scanning eBPF map types...", + "/*** eBPF map types ***/", + define_prefix); + + for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) + probe_map_type(i, define_prefix, ifindex); + + print_end_then_start_section("helpers", + "Scanning eBPF helper functions...", + "/*** eBPF helper functions ***/", + define_prefix); + + if (define_prefix) + printf("/*\n" + " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n" + " * to determine if is available for ,\n" + " * e.g.\n" + " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n" + " * // do stuff with this helper\n" + " * #elif\n" + " * // use a workaround\n" + " * #endif\n" + " */\n" + "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n" + " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", + define_prefix, define_prefix, define_prefix, + define_prefix); + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + probe_helpers_for_progtype(i, supported_types[i], + define_prefix, ifindex); + +exit_close_json: + if (json_output) { + /* End current "section" of probes */ + jsonw_end_object(json_wtr); + /* End root object */ + jsonw_end_object(json_wtr); + } + + return 0; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n" + " %s %s help\n" + "\n" + " COMPONENT := { kernel | dev NAME }\n" + "", + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "probe", do_probe }, + { "help", do_help }, + { 0 } +}; + +int do_feature(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index bff7ee026680..6046dcab51cc 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -1,15 +1,10 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) /* * Simple streaming JSON writer * * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger */ diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index c1ab51aed99c..cb9a1993681c 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -5,11 +5,6 @@ * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * * Authors: Stephen Hemminger */ diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index f44a1c2c4ea0..a9d5e9e6a732 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -56,7 +56,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -187,6 +187,7 @@ static const struct cmd cmds[] = { { "cgroup", do_cgroup }, { "perf", do_perf }, { "net", do_net }, + { "feature", do_feature }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 052c91d4dc55..d7dd84d3c660 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -75,6 +75,9 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", }; +extern const char * const map_type_name[]; +extern const size_t map_type_name_size; + enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, @@ -145,6 +148,7 @@ int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); int do_net(int argc, char **arg); int do_tracelog(int argc, char **arg); +int do_feature(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 2037e3dc864b..2160a8ef17e5 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -21,7 +21,7 @@ #include "json_writer.h" #include "main.h" -static const char * const map_type_name[] = { +const char * const map_type_name[] = { [BPF_MAP_TYPE_UNSPEC] = "unspec", [BPF_MAP_TYPE_HASH] = "hash", [BPF_MAP_TYPE_ARRAY] = "array", @@ -48,6 +48,8 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_STACK] = "stack", }; +const size_t map_type_name_size = ARRAY_SIZE(map_type_name); + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -285,16 +287,21 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, single_line = info->key_size + info->value_size <= 24 && !break_names; - printf("key:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, key, info->key_size, " "); + if (info->key_size) { + printf("key:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, key, info->key_size, " "); - printf(single_line ? " " : "\n"); + printf(single_line ? " " : "\n"); + } - printf("value:%c", break_names ? '\n' : ' '); - if (value) - fprint_hex(stdout, value, info->value_size, " "); - else - printf(""); + if (info->value_size) { + printf("value:%c", break_names ? '\n' : ' '); + if (value) + fprint_hex(stdout, value, info->value_size, + " "); + else + printf(""); + } printf("\n"); } else { @@ -303,19 +310,23 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, n = get_possible_cpus(); step = round_up(info->value_size, 8); - printf("key:\n"); - fprint_hex(stdout, key, info->key_size, " "); - printf("\n"); - for (i = 0; i < n; i++) { - printf("value (CPU %02d):%c", - i, info->value_size > 16 ? '\n' : ' '); - if (value) - fprint_hex(stdout, value + i * step, - info->value_size, " "); - else - printf(""); + if (info->key_size) { + printf("key:\n"); + fprint_hex(stdout, key, info->key_size, " "); printf("\n"); } + if (info->value_size) { + for (i = 0; i < n; i++) { + printf("value (CPU %02d):%c", + i, info->value_size > 16 ? '\n' : ' '); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf(""); + printf("\n"); + } + } } } @@ -415,6 +426,9 @@ static int parse_elem(char **argv, struct bpf_map_info *info, p_err("not enough value arguments for map of progs"); return -1; } + if (is_prefix(*argv, "id")) + p_info("Warning: updating program array via MAP_ID, make sure this map is kept open\n" + " by some process or pinned otherwise update will be lost"); fd = prog_parse_fd(&argc, &argv); if (fd < 0) @@ -779,6 +793,32 @@ exit_free: return err; } +static int alloc_key_value(struct bpf_map_info *info, void **key, void **value) +{ + *key = NULL; + *value = NULL; + + if (info->key_size) { + *key = malloc(info->key_size); + if (!*key) { + p_err("key mem alloc failed"); + return -1; + } + } + + if (info->value_size) { + *value = alloc_value(info); + if (!*value) { + p_err("value mem alloc failed"); + free(*key); + *key = NULL; + return -1; + } + } + + return 0; +} + static int do_update(int argc, char **argv) { struct bpf_map_info info = {}; @@ -795,13 +835,9 @@ static int do_update(int argc, char **argv) if (fd < 0) return -1; - key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - } err = parse_elem(argv, &info, key, value, info.key_size, info.value_size, &flags, &value_fd); @@ -826,12 +862,51 @@ exit_free: return err; } +static void print_key_value(struct bpf_map_info *info, void *key, + void *value) +{ + json_writer_t *btf_wtr; + struct btf *btf = NULL; + int err; + + err = btf__get_from_id(info->btf_id, &btf); + if (err) { + p_err("failed to get btf"); + return; + } + + if (json_output) { + print_entry_json(info, key, value, btf); + } else if (btf) { + /* if here json_wtr wouldn't have been initialised, + * so let's create separate writer for btf + */ + btf_wtr = get_btf_writer(); + if (!btf_wtr) { + p_info("failed to create json writer for btf. falling back to plain output"); + btf__free(btf); + btf = NULL; + print_entry_plain(info, key, value); + } else { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, info, key, value); + jsonw_destroy(&btf_wtr); + } + } else { + print_entry_plain(info, key, value); + } + btf__free(btf); +} + static int do_lookup(int argc, char **argv) { struct bpf_map_info info = {}; __u32 len = sizeof(info); - json_writer_t *btf_wtr; - struct btf *btf = NULL; void *key, *value; int err; int fd; @@ -843,13 +918,9 @@ static int do_lookup(int argc, char **argv) if (fd < 0) return -1; - key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - } err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); if (err) @@ -873,43 +944,12 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto exit_free; - } - - if (json_output) { - print_entry_json(&info, key, value, btf); - } else if (btf) { - /* if here json_wtr wouldn't have been initialised, - * so let's create separate writer for btf - */ - btf_wtr = get_btf_writer(); - if (!btf_wtr) { - p_info("failed to create json writer for btf. falling back to plain output"); - btf__free(btf); - btf = NULL; - print_entry_plain(&info, key, value); - } else { - struct btf_dumper d = { - .btf = btf, - .jw = btf_wtr, - .is_plain_text = true, - }; - - do_dump_btf(&d, &info, key, value); - jsonw_destroy(&btf_wtr); - } - } else { - print_entry_plain(&info, key, value); - } + print_key_value(&info, key, value); exit_free: free(key); free(value); close(fd); - btf__free(btf); return err; } @@ -1122,6 +1162,49 @@ static int do_create(int argc, char **argv) return 0; } +static int do_pop_dequeue(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *value; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + err = alloc_key_value(&info, &key, &value); + if (err) + goto exit_free; + + err = bpf_map_lookup_and_delete_elem(fd, key, value); + if (err) { + if (errno == ENOENT) { + if (json_output) + jsonw_null(json_wtr); + else + printf("Error: empty map\n"); + } else { + p_err("pop failed: %s", strerror(errno)); + } + + goto exit_free; + } + + print_key_value(&info, key, value); + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1135,12 +1218,17 @@ static int do_help(int argc, char **argv) " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" " [dev NAME]\n" " %s %s dump MAP\n" - " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" - " %s %s lookup MAP key DATA\n" + " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" + " %s %s lookup MAP [key DATA]\n" " %s %s getnext MAP [key DATA]\n" " %s %s delete MAP key DATA\n" " %s %s pin MAP FILE\n" " %s %s event_pipe MAP [cpu N index M]\n" + " %s %s peek MAP\n" + " %s %s push MAP value VALUE\n" + " %s %s pop MAP\n" + " %s %s enqueue MAP value VALUE\n" + " %s %s dequeue MAP\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1158,7 +1246,8 @@ static int do_help(int argc, char **argv) bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1175,6 +1264,11 @@ static const struct cmd cmds[] = { { "pin", do_pin }, { "event_pipe", do_event_pipe }, { "create", do_create }, + { "peek", do_lookup }, + { "push", do_update }, + { "enqueue", do_update }, + { "pop", do_pop_dequeue }, + { "dequeue", do_pop_dequeue }, { 0 } }; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2d1bb7d6ff51..0640e9bc0ada 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -930,10 +930,9 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) err = libbpf_prog_type_by_name(type, &attr.prog_type, &expected_attach_type); free(type); - if (err < 0) { - p_err("unknown program type '%s'", *argv); + if (err < 0) goto err_free_reuse_maps; - } + NEXT_ARG(); } else if (is_prefix(*argv, "map")) { void *new_map_replace; @@ -1028,11 +1027,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) err = libbpf_prog_type_by_name(sec_name, &prog_type, &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); + if (err < 0) goto err_close_obj; - } } bpf_program__set_ifindex(pos, ifindex); diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index af55acf73e75..cce0b02c0e28 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -199,6 +199,16 @@ .off = OFF, \ .imm = 0 }) +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ @@ -209,6 +219,16 @@ .off = OFF, \ .imm = IMM }) +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */ #define BPF_JMP_A(OFF) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 91c43884f295..60b99b730a41 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -14,6 +14,7 @@ /* Extended instruction set based on top of classic BPF */ /* instruction classes */ +#define BPF_JMP32 0x06 /* jmp mode in word width */ #define BPF_ALU64 0x07 /* alu mode in double word width */ /* ld/ldx fields */ @@ -2540,6 +2541,7 @@ struct __sk_buff { __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); __u64 tstamp; __u32 wire_len; + __u32 gso_segs; }; struct bpf_tunnel_key { diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h new file mode 100644 index 000000000000..0d18b1d1fbbc --- /dev/null +++ b/tools/include/uapi/linux/pkt_sched.h @@ -0,0 +1,1163 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#include + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats { + __u64 bytes; /* Number of enqueued bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; + +struct tc_estimator { + signed char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) +#define TC_H_CLSACT TC_H_INGRESS + +#define TC_H_MIN_PRIORITY 0xFFE0U +#define TC_H_MIN_INGRESS 0xFFF2U +#define TC_H_MIN_EGRESS 0xFFF3U + +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + +struct tc_ratespec { + unsigned char cell_log; + __u8 linklayer; /* lower 4 bits */ + unsigned short overhead; + short cell_align; + unsigned short mpu; + __u32 rate; +}; + +#define TC_RTAB_SIZE 1024 + +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + +/* FIFO section */ + +struct tc_fifo_qopt { + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* SKBPRIO section */ + +/* + * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1). + * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able + * to map one to one the DS field of IPV4 and IPV6 headers. + * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY. + */ + +#define SKBPRIO_MAX_PRIORITY 64 + +struct tc_skbprio_qopt { + __u32 limit; /* Queue length in packets. */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 + +struct tc_prio_qopt { + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + +/* PLUG section */ + +#define TCQ_PLUG_BUFFER 0 +#define TCQ_PLUG_RELEASE_ONE 1 +#define TCQ_PLUG_RELEASE_INDEFINITE 2 +#define TCQ_PLUG_LIMIT 3 + +struct tc_plug_qopt { + /* TCQ_PLUG_BUFFER: Inset a plug into the queue and + * buffer any incoming packets + * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head + * to beginning of the next plug. + * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. + * Stop buffering packets until the next TCQ_PLUG_BUFFER + * command is received (just act as a pass-thru queue). + * TCQ_PLUG_LIMIT: Increase/decrease queue size + */ + int action; + __u32 limit; +}; + +/* TBF section */ + +struct tc_tbf_qopt { + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, + TCA_TBF_PAD, + __TCA_TBF_MAX, +}; + +#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; +}; + + +struct tc_sfq_xstats { + __s32 allot; +}; + +/* RED section */ + +enum { + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, + TCA_RED_MAX_P, + __TCA_RED_MAX, +}; + +#define TCA_RED_MAX (__TCA_RED_MAX - 1) + +struct tc_red_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 +}; + +struct tc_red_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum { + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, + TCA_GRED_MAX_P, + TCA_GRED_LIMIT, + TCA_GRED_VQ_LIST, /* nested TCA_GRED_VQ_ENTRY */ + __TCA_GRED_MAX, +}; + +#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) + +enum { + TCA_GRED_VQ_ENTRY_UNSPEC, + TCA_GRED_VQ_ENTRY, /* nested TCA_GRED_VQ_* */ + __TCA_GRED_VQ_ENTRY_MAX, +}; +#define TCA_GRED_VQ_ENTRY_MAX (__TCA_GRED_VQ_ENTRY_MAX - 1) + +enum { + TCA_GRED_VQ_UNSPEC, + TCA_GRED_VQ_PAD, + TCA_GRED_VQ_DP, /* u32 */ + TCA_GRED_VQ_STAT_BYTES, /* u64 */ + TCA_GRED_VQ_STAT_PACKETS, /* u32 */ + TCA_GRED_VQ_STAT_BACKLOG, /* u32 */ + TCA_GRED_VQ_STAT_PROB_DROP, /* u32 */ + TCA_GRED_VQ_STAT_PROB_MARK, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_DROP, /* u32 */ + TCA_GRED_VQ_STAT_FORCED_MARK, /* u32 */ + TCA_GRED_VQ_STAT_PDROP, /* u32 */ + TCA_GRED_VQ_STAT_OTHER, /* u32 */ + TCA_GRED_VQ_FLAGS, /* u32 */ + __TCA_GRED_VQ_MAX +}; + +#define TCA_GRED_VQ_MAX (__TCA_GRED_VQ_MAX - 1) + +struct tc_gred_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* up to 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; + +/* gred setup */ +struct tc_gred_sopt { + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; +}; + +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt { + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob { + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shaped packets */ +}; +enum { + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, + TCA_HTB_PAD, + __TCA_HTB_MAX, +}; + +#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) + +struct tc_htb_xstats { + __u32 lends; + __u32 borrows; + __u32 giants; /* unused since 'Make HTB scheduler work with TSO.' */ + __s32 tokens; + __s32 ctokens; +}; + +/* HFSC section */ + +struct tc_hfsc_qopt { + __u16 defcls; /* default class */ +}; + +struct tc_service_curve { + __u32 m1; /* slope of the first segment in bps */ + __u32 d; /* x-projection of the first segment in us */ + __u32 m2; /* slope of the second segment in bps */ +}; + +struct tc_hfsc_stats { + __u64 work; /* total work done */ + __u64 rtwork; /* work done by real-time criteria */ + __u32 period; /* current period */ + __u32 level; /* class level in hierarchy */ +}; + +enum { + TCA_HFSC_UNSPEC, + TCA_HFSC_RSC, + TCA_HFSC_FSC, + TCA_HFSC_USC, + __TCA_HFSC_MAX, +}; + +#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) + + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt { + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt { + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl { + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u16 pad; + __u32 penalty; +}; + +struct tc_cbq_police { + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt { + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats { + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, + __TCA_CBQ_MAX, +}; + +#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE, + __TCA_DSMARK_MAX, +}; + +#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ + __TCA_ATM_MAX, +}; + +#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) + +/* Network emulator */ + +enum { + TCA_NETEM_UNSPEC, + TCA_NETEM_CORR, + TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, + TCA_NETEM_RATE, + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + TCA_NETEM_PAD, + TCA_NETEM_LATENCY64, + TCA_NETEM_JITTER64, + TCA_NETEM_SLOT, + TCA_NETEM_SLOT_DIST, + __TCA_NETEM_MAX, +}; + +#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) + +struct tc_netem_qopt { + __u32 latency; /* added delay (us) */ + __u32 limit; /* fifo limit (packets) */ + __u32 loss; /* random packet loss (0=none ~0=100%) */ + __u32 gap; /* re-ordering gap (0 for none) */ + __u32 duplicate; /* random packet dup (0=none ~0=100%) */ + __u32 jitter; /* random jitter in latency (us) */ +}; + +struct tc_netem_corr { + __u32 delay_corr; /* delay correlation */ + __u32 loss_corr; /* packet loss correlation */ + __u32 dup_corr; /* duplicate correlation */ +}; + +struct tc_netem_reorder { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_rate { + __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; +}; + +struct tc_netem_slot { + __s64 min_delay; /* nsec */ + __s64 max_delay; + __s32 max_packets; + __s32 max_bytes; + __s64 dist_delay; /* nsec */ + __s64 dist_jitter; /* nsec */ +}; + +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probabilities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + +#define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 + +/* DRR */ + +enum { + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats { + __u32 deficit; +}; + +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +enum { + TC_MQPRIO_HW_OFFLOAD_NONE, /* no offload requested */ + TC_MQPRIO_HW_OFFLOAD_TCS, /* offload TCs, no queue counts */ + __TC_MQPRIO_HW_OFFLOAD_MAX +}; + +#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1) + +enum { + TC_MQPRIO_MODE_DCB, + TC_MQPRIO_MODE_CHANNEL, + __TC_MQPRIO_MODE_MAX +}; + +#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1) + +enum { + TC_MQPRIO_SHAPER_DCB, + TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */ + __TC_MQPRIO_SHAPER_MAX +}; + +#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1) + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + +#define TC_MQPRIO_F_MODE 0x1 +#define TC_MQPRIO_F_SHAPER 0x2 +#define TC_MQPRIO_F_MIN_RATE 0x4 +#define TC_MQPRIO_F_MAX_RATE 0x8 + +enum { + TCA_MQPRIO_UNSPEC, + TCA_MQPRIO_MODE, + TCA_MQPRIO_SHAPER, + TCA_MQPRIO_MIN_RATE64, + TCA_MQPRIO_MAX_RATE64, + __TCA_MQPRIO_MAX, +}; + +#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1) + +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + +/* QFQ */ +enum { + TCA_QFQ_UNSPEC, + TCA_QFQ_WEIGHT, + TCA_QFQ_LMAX, + __TCA_QFQ_MAX +}; + +#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) + +struct tc_qfq_stats { + __u32 weight; + __u32 lmax; +}; + +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_TARGET, + TCA_CODEL_LIMIT, + TCA_CODEL_INTERVAL, + TCA_CODEL_ECN, + TCA_CODEL_CE_THRESHOLD, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) + +struct tc_codel_xstats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 count; /* how many drops we've done since the last time we + * entered dropping state + */ + __u32 lastcount; /* count at entry to dropping state */ + __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ + __s32 drop_next; /* time to drop next packet */ + __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ + __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ + __u32 dropping; /* are we in dropping state ? */ + __u32 ce_mark; /* number of CE marked packets because of ce_threshold */ +}; + +/* FQ_CODEL */ + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + TCA_FQ_CODEL_CE_THRESHOLD, + TCA_FQ_CODEL_DROP_BATCH_SIZE, + TCA_FQ_CODEL_MEMORY_LIMIT, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ + __u32 ce_mark; /* packets above ce_threshold */ + __u32 memory_usage; /* in bytes */ + __u32 drop_overmemory; +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + + TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + + TCA_FQ_CE_THRESHOLD, /* DCTCP-like CE-marking threshold */ + + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 unthrottle_latency_ns; + __u64 ce_mark; /* packets above ce_threshold */ +}; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; + +/* CBS */ +struct tc_cbs_qopt { + __u8 offload; + __u8 _pad[3]; + __s32 hicredit; + __s32 locredit; + __s32 idleslope; + __s32 sendslope; +}; + +enum { + TCA_CBS_UNSPEC, + TCA_CBS_PARMS, + __TCA_CBS_MAX, +}; + +#define TCA_CBS_MAX (__TCA_CBS_MAX - 1) + + +/* ETF */ +struct tc_etf_qopt { + __s32 delta; + __s32 clockid; + __u32 flags; +#define TC_ETF_DEADLINE_MODE_ON BIT(0) +#define TC_ETF_OFFLOAD_ON BIT(1) +}; + +enum { + TCA_ETF_UNSPEC, + TCA_ETF_PARMS, + __TCA_ETF_MAX, +}; + +#define TCA_ETF_MAX (__TCA_ETF_MAX - 1) + + +/* CAKE */ +enum { + TCA_CAKE_UNSPEC, + TCA_CAKE_PAD, + TCA_CAKE_BASE_RATE64, + TCA_CAKE_DIFFSERV_MODE, + TCA_CAKE_ATM, + TCA_CAKE_FLOW_MODE, + TCA_CAKE_OVERHEAD, + TCA_CAKE_RTT, + TCA_CAKE_TARGET, + TCA_CAKE_AUTORATE, + TCA_CAKE_MEMORY, + TCA_CAKE_NAT, + TCA_CAKE_RAW, + TCA_CAKE_WASH, + TCA_CAKE_MPU, + TCA_CAKE_INGRESS, + TCA_CAKE_ACK_FILTER, + TCA_CAKE_SPLIT_GSO, + __TCA_CAKE_MAX +}; +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1) + +enum { + __TCA_CAKE_STATS_INVALID, + TCA_CAKE_STATS_PAD, + TCA_CAKE_STATS_CAPACITY_ESTIMATE64, + TCA_CAKE_STATS_MEMORY_LIMIT, + TCA_CAKE_STATS_MEMORY_USED, + TCA_CAKE_STATS_AVG_NETOFF, + TCA_CAKE_STATS_MIN_NETLEN, + TCA_CAKE_STATS_MAX_NETLEN, + TCA_CAKE_STATS_MIN_ADJLEN, + TCA_CAKE_STATS_MAX_ADJLEN, + TCA_CAKE_STATS_TIN_STATS, + TCA_CAKE_STATS_DEFICIT, + TCA_CAKE_STATS_COBALT_COUNT, + TCA_CAKE_STATS_DROPPING, + TCA_CAKE_STATS_DROP_NEXT_US, + TCA_CAKE_STATS_P_DROP, + TCA_CAKE_STATS_BLUE_TIMER_US, + __TCA_CAKE_STATS_MAX +}; +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1) + +enum { + __TCA_CAKE_TIN_STATS_INVALID, + TCA_CAKE_TIN_STATS_PAD, + TCA_CAKE_TIN_STATS_SENT_PACKETS, + TCA_CAKE_TIN_STATS_SENT_BYTES64, + TCA_CAKE_TIN_STATS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS, + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64, + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS, + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64, + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS, + TCA_CAKE_TIN_STATS_BACKLOG_BYTES, + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64, + TCA_CAKE_TIN_STATS_TARGET_US, + TCA_CAKE_TIN_STATS_INTERVAL_US, + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS, + TCA_CAKE_TIN_STATS_WAY_MISSES, + TCA_CAKE_TIN_STATS_WAY_COLLISIONS, + TCA_CAKE_TIN_STATS_PEAK_DELAY_US, + TCA_CAKE_TIN_STATS_AVG_DELAY_US, + TCA_CAKE_TIN_STATS_BASE_DELAY_US, + TCA_CAKE_TIN_STATS_SPARSE_FLOWS, + TCA_CAKE_TIN_STATS_BULK_FLOWS, + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS, + TCA_CAKE_TIN_STATS_MAX_SKBLEN, + TCA_CAKE_TIN_STATS_FLOW_QUANTUM, + __TCA_CAKE_TIN_STATS_MAX +}; +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1) +#define TC_CAKE_MAX_TINS (8) + +enum { + CAKE_FLOW_NONE = 0, + CAKE_FLOW_SRC_IP, + CAKE_FLOW_DST_IP, + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */ + CAKE_FLOW_FLOWS, + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */ + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */ + CAKE_FLOW_MAX, +}; + +enum { + CAKE_DIFFSERV_DIFFSERV3 = 0, + CAKE_DIFFSERV_DIFFSERV4, + CAKE_DIFFSERV_DIFFSERV8, + CAKE_DIFFSERV_BESTEFFORT, + CAKE_DIFFSERV_PRECEDENCE, + CAKE_DIFFSERV_MAX +}; + +enum { + CAKE_ACK_NONE = 0, + CAKE_ACK_FILTER, + CAKE_ACK_AGGRESSIVE, + CAKE_ACK_MAX +}; + +enum { + CAKE_ATM_NONE = 0, + CAKE_ATM_ATM, + CAKE_ATM_PTM, + CAKE_ATM_MAX +}; + + +/* TAPRIO */ +enum { + TC_TAPRIO_CMD_SET_GATES = 0x00, + TC_TAPRIO_CMD_SET_AND_HOLD = 0x01, + TC_TAPRIO_CMD_SET_AND_RELEASE = 0x02, +}; + +enum { + TCA_TAPRIO_SCHED_ENTRY_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY_INDEX, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_CMD, /* u8 */ + TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, /* u32 */ + TCA_TAPRIO_SCHED_ENTRY_INTERVAL, /* u32 */ + __TCA_TAPRIO_SCHED_ENTRY_MAX, +}; +#define TCA_TAPRIO_SCHED_ENTRY_MAX (__TCA_TAPRIO_SCHED_ENTRY_MAX - 1) + +/* The format for schedule entry list is: + * [TCA_TAPRIO_SCHED_ENTRY_LIST] + * [TCA_TAPRIO_SCHED_ENTRY] + * [TCA_TAPRIO_SCHED_ENTRY_CMD] + * [TCA_TAPRIO_SCHED_ENTRY_GATES] + * [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] + */ +enum { + TCA_TAPRIO_SCHED_UNSPEC, + TCA_TAPRIO_SCHED_ENTRY, + __TCA_TAPRIO_SCHED_MAX, +}; + +#define TCA_TAPRIO_SCHED_MAX (__TCA_TAPRIO_SCHED_MAX - 1) + +enum { + TCA_TAPRIO_ATTR_UNSPEC, + TCA_TAPRIO_ATTR_PRIOMAP, /* struct tc_mqprio_qopt */ + TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST, /* nested of entry */ + TCA_TAPRIO_ATTR_SCHED_BASE_TIME, /* s64 */ + TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY, /* single entry */ + TCA_TAPRIO_ATTR_SCHED_CLOCKID, /* s32 */ + TCA_TAPRIO_PAD, + __TCA_TAPRIO_ATTR_MAX, +}; + +#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1) + +#endif diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 197b40f5b5c6..bfd9bfc82c3b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 34d9c3619c96..847916273696 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -14,21 +14,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) - INSTALL = install # Use DESTDIR for installing into a different root directory. @@ -54,7 +39,7 @@ man_dir_SQ = '$(subst ','\'',$(man_dir))' export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ -include ../../scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3caaa3428774..88cbd110ae58 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -65,6 +65,17 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; @@ -232,7 +243,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) return fd; @@ -269,7 +280,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, break; } - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) goto done; @@ -283,7 +294,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: free(finfo); free(linfo); @@ -328,7 +339,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + return sys_bpf_prog_load(&attr, sizeof(attr)); } int bpf_map_update_elem(int fd, const void *key, const void *value, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 169e347c76f6..2ccde17957e6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2667,9 +2667,38 @@ static const struct { #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#define MAX_TYPE_NAME_SIZE 32 + +static char *libbpf_get_type_names(bool attach_type) +{ + int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + char *buf; + + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + /* Forge string buf with all available names */ + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (attach_type && !section_names[i].is_attachable) + continue; + + if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + free(buf); + return NULL; + } + strcat(buf, " "); + strcat(buf, section_names[i].sec); + } + + return buf; +} + int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + char *type_names; int i; if (!name) @@ -2682,12 +2711,20 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, *expected_attach_type = section_names[i].expected_attach_type; return 0; } + pr_warning("failed to guess program type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(false); + if (type_names != NULL) { + pr_info("supported section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { + char *type_names; int i; if (!name) @@ -2701,6 +2738,13 @@ int libbpf_attach_type_by_name(const char *name, *attach_type = section_names[i].attach_type; return 0; } + pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_info("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } @@ -2907,8 +2951,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, err = bpf_program__identify_section(prog, &prog_type, &expected_attach_type); if (err < 0) { - pr_warning("failed to guess program type based on section name %s\n", - prog->section_name); bpf_object__close(obj); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5f68d7b75215..62ae6cb93da1 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -355,6 +355,20 @@ LIBBPF_API const struct bpf_line_info * bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, __u32 insn_off, __u32 nr_skip); +/* + * Probe for supported system features + * + * Note that running many of these probes in a short amount of time can cause + * the kernel to reach the maximal size of lockable memory allowed for the + * user, causing subsequent probes to fail. In this case, the caller may want + * to adjust that limit with setrlimit(). + */ +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, + __u32 ifindex); +LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, + enum bpf_prog_type prog_type, __u32 ifindex); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index cd02cd4e2cc3..266bc95d0142 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -124,3 +124,10 @@ LIBBPF_0.0.1 { local: *; }; + +LIBBPF_0.0.2 { + global: + bpf_probe_helper; + bpf_probe_map_type; + bpf_probe_prog_type; +} LIBBPF_0.0.1; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c new file mode 100644 index 000000000000..8c3a1c04dcb2 --- /dev/null +++ b/tools/lib/bpf/libbpf_probes.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Netronome Systems, Inc. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "bpf.h" +#include "libbpf.h" + +static bool grep(const char *buffer, const char *pattern) +{ + return !!strstr(buffer, pattern); +} + +static int get_vendor_id(int ifindex) +{ + char ifname[IF_NAMESIZE], path[64], buf[8]; + ssize_t len; + int fd; + + if (!if_indextoname(ifindex, ifname)) + return -1; + + snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) + return -1; + if (len >= (ssize_t)sizeof(buf)) + return -1; + buf[len] = '\0'; + + return strtol(buf, NULL, 0); +} + +static int get_kernel_version(void) +{ + int version, subversion, patchlevel; + struct utsname utsn; + + /* Return 0 on failure, and attempt to probe with empty kversion */ + if (uname(&utsn)) + return 0; + + if (sscanf(utsn.release, "%d.%d.%d", + &version, &subversion, &patchlevel) != 3) + return 0; + + return (version << 16) + (subversion << 8) + patchlevel; +} + +static void +probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) +{ + struct bpf_load_program_attr xattr = {}; + int fd; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + break; + case BPF_PROG_TYPE_KPROBE: + xattr.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + default: + break; + } + + xattr.prog_type = prog_type; + xattr.insns = insns; + xattr.insns_cnt = insns_cnt; + xattr.license = "GPL"; + xattr.prog_ifindex = ifindex; + + fd = bpf_load_program_xattr(&xattr, buf, buf_len); + if (fd >= 0) + close(fd); +} + +bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) + /* nfp returns -EINVAL on exit(0) with TC offload */ + insns[0].imm = 2; + + errno = 0; + probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + + return errno != EINVAL && errno != EOPNOTSUPP; +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + int key_size, value_size, max_entries, map_flags; + struct bpf_create_map_attr attr = {}; + int fd = -1, fd_inner; + + key_size = sizeof(__u32); + value_size = sizeof(__u32); + max_entries = 1; + map_flags = 0; + + switch (map_type) { + case BPF_MAP_TYPE_STACK_TRACE: + value_size = sizeof(__u64); + break; + case BPF_MAP_TYPE_LPM_TRIE: + key_size = sizeof(__u64); + value_size = sizeof(__u64); + map_flags = BPF_F_NO_PREALLOC; + break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + key_size = sizeof(struct bpf_cgroup_storage_key); + value_size = sizeof(__u64); + max_entries = 0; + break; + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + key_size = 0; + break; + case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + default: + break; + } + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) + return false; + + fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(__u32), sizeof(__u32), 1, 0); + if (fd_inner < 0) + return false; + fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), + fd_inner, 1, 0); + close(fd_inner); + } else { + /* Note: No other restriction on map type probes for offload */ + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + attr.map_ifindex = ifindex; + + fd = bpf_create_map_xattr(&attr); + } + if (fd >= 0) + close(fd); + + return fd >= 0; +} + +bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, + __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_EMIT_CALL(id), + BPF_EXIT_INSN() + }; + char buf[4096] = {}; + bool res; + + probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), + ifindex); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + + if (ifindex) { + switch (get_vendor_id(ifindex)) { + case 0x19ee: /* Netronome specific */ + res = res && !grep(buf, "not supported by FW") && + !grep(buf, "unsupported function id"); + break; + default: + break; + } + } + + return res; +} diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 1076393e6f43..e18a3556f5e3 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -63,7 +63,8 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_TRAP] = "trap", [PERF_REG_POWERPC_DAR] = "dar", [PERF_REG_POWERPC_DSISR] = "dsisr", - [PERF_REG_POWERPC_SIER] = "sier" + [PERF_REG_POWERPC_SIER] = "sier", + [PERF_REG_POWERPC_MMCRA] = "mmcra" }; static inline const char *perf_reg_name(int id) diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 07fcd977d93e..34d5134681d9 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -53,6 +53,7 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(dar, PERF_REG_POWERPC_DAR), SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), SMPL_REG(sier, PERF_REG_POWERPC_SIER), + SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA), SMPL_REG_END }; diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c index e75238404555..2d4baf57822f 100644 --- a/tools/testing/nvdimm/dimm_devs.c +++ b/tools/testing/nvdimm/dimm_devs.c @@ -18,8 +18,8 @@ ssize_t security_show(struct device *dev, * For the test version we need to poll the "hardware" in order * to get the updated status for unlock testing. */ - nvdimm->sec.state = nvdimm_security_state(nvdimm, false); - nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, true); + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); switch (nvdimm->sec.state) { case NVDIMM_SECURITY_DISABLED: diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 70229de510f5..8993e9c8f410 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,15 +10,14 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif +CLANG ?= clang +LLC ?= llc +LLVM_OBJCOPY ?= llvm-objcopy +LLVM_READELF ?= llvm-readelf +BTF_PAHOLE ?= pahole CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -lrt -lpthread -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read -all: $(TEST_CUSTOM_PROGS) - -$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c - $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id - # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ @@ -26,21 +25,42 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user -TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ - test_tcpnotify_kern.o \ - sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ - sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ - test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ - test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ - test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ +BPF_OBJ_FILES = \ + test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o \ + test_tcpnotify_kern.o sample_map_ret0.o test_tcpbpf_kern.o \ + sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o \ + test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o \ + test_tunnel_kern.o test_sockhash_kern.o test_lwt_seg6local.o \ + sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ - test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ + test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_xdp_vlan.o \ xdp_dummy.o test_map_in_map.o +# Objects are built with default compilation flags and with sub-register +# code-gen enabled. +BPF_OBJ_FILES_DUAL_COMPILE = \ + test_pkt_access.o test_pkt_access.o test_xdp.o test_adjust_tail.o \ + test_l4lb.o test_l4lb_noinline.o test_xdp_noinline.o test_tcp_estats.o \ + test_obj_id.o test_pkt_md_access.o test_tracepoint.o \ + test_stacktrace_map.o test_stacktrace_map.o test_stacktrace_build_id.o \ + test_stacktrace_build_id.o test_get_stack_rawtp.o \ + test_get_stack_rawtp.o test_tracepoint.o test_sk_lookup_kern.o \ + test_queue_map.o test_stack_map.o + +TEST_GEN_FILES = $(BPF_OBJ_FILES) $(BPF_OBJ_FILES_DUAL_COMPILE) + +# Also test sub-register code-gen if LLVM + kernel both has eBPF v3 processor +# support which is the first version to contain both ALU32 and JMP32 +# instructions. +SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ + $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \ + $(LLC) -mattr=+alu32 -mcpu=probe 2>&1 | \ + grep 'if w') +ifneq ($(SUBREG_CODEGEN),) +TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES_DUAL_COMPILE)) +endif + # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_libbpf.sh \ @@ -56,6 +76,7 @@ TEST_PROGS := test_kmod.sh \ test_xdp_vlan.sh TEST_PROGS_EXTENDED := with_addr.sh \ + with_tunnels.sh \ tcp_client.py \ tcp_server.py @@ -65,6 +86,13 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_use include ../lib.mk +# NOTE: $(OUTPUT) won't get default value if used before lib.mk +TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read +all: $(TEST_CUSTOM_PROGS) + +$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c + $(CC) -o $@ -static $< -Wl,--build-id + BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS): $(BPFOBJ) @@ -92,11 +120,6 @@ force: $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ -CLANG ?= clang -LLC ?= llc -LLVM_OBJCOPY ?= llvm-objcopy -BTF_PAHOLE ?= pahole - PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) # Let newer LLVM versions transparently probe the kernel for availability @@ -126,12 +149,15 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_queue_map.o: test_queue_stack_map.h $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h +$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h +$(OUTPUT)/test_progs.o: flow_dissector_load.h + BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - readelf -S ./llvm_btf_verify.o | grep BTF; \ + $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) ifneq ($(BTF_LLVM_PROBE),) @@ -148,6 +174,30 @@ endif endif endif +ifneq ($(SUBREG_CODEGEN),) +ALU32_BUILD_DIR = $(OUTPUT)/alu32 +TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32 +$(ALU32_BUILD_DIR): + mkdir -p $@ + +$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read + cp $< $@ + +$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(ALU32_BUILD_DIR) \ + $(ALU32_BUILD_DIR)/urandom_read + $(CC) $(CFLAGS) -o $(ALU32_BUILD_DIR)/test_progs_32 $< \ + trace_helpers.c $(OUTPUT)/libbpf.a $(LDLIBS) + +$(ALU32_BUILD_DIR)/%.o: %.c $(ALU32_BUILD_DIR) $(ALU32_BUILD_DIR)/test_progs_32 + $(CLANG) $(CLANG_FLAGS) \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ + -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif +endif + # Have one program compiled without "-target bpf" to test whether libbpf loads # it successfully $(OUTPUT)/test_xdp.o: test_xdp.c @@ -166,4 +216,17 @@ ifeq ($(DWARF2BTF),y) $(BTF_PAHOLE) -J $@ endif -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) +$(OUTPUT)/test_verifier: $(OUTPUT)/verifier/tests.h +$(OUTPUT)/test_verifier: CFLAGS += -I$(OUTPUT) + +VERIFIER_TEST_FILES := $(wildcard verifier/*.c) +$(OUTPUT)/verifier/tests.h: $(VERIFIER_TEST_FILES) + $(shell ( cd verifier/ + echo '/* Generated header, do not edit */'; \ + echo '#ifdef FILL_ARRAY'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\(.*\)@#include \"\1\"@'; \ + echo '#endif' \ + ) > $(OUTPUT)/verifier/tests.h) + +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index ae8180b11d5f..77cafa66d048 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -12,6 +12,7 @@ #include #include "bpf_rlimit.h" +#include "flow_dissector_load.h" const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; @@ -21,46 +22,13 @@ char *cfg_path_name; static void load_and_attach_program(void) { - struct bpf_program *prog, *main_prog; - struct bpf_map *prog_array; - int i, fd, prog_fd, ret; + int prog_fd, ret; struct bpf_object *obj; - int prog_array_fd; - ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj, - &prog_fd); + ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, + cfg_map_name, &prog_fd); if (ret) - error(1, 0, "bpf_prog_load %s", cfg_path_name); - - main_prog = bpf_object__find_program_by_title(obj, cfg_section_name); - if (!main_prog) - error(1, 0, "bpf_object__find_program_by_title %s", - cfg_section_name); - - prog_fd = bpf_program__fd(main_prog); - if (prog_fd < 0) - error(1, 0, "bpf_program__fd"); - - prog_array = bpf_object__find_map_by_name(obj, cfg_map_name); - if (!prog_array) - error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name); - - prog_array_fd = bpf_map__fd(prog_array); - if (prog_array_fd < 0) - error(1, 0, "bpf_map__fd %s", cfg_map_name); - - i = 0; - bpf_object__for_each_program(prog, obj) { - fd = bpf_program__fd(prog); - if (fd < 0) - error(1, 0, "bpf_program__fd"); - - if (fd != prog_fd) { - printf("%d: %s\n", i, bpf_program__title(prog, false)); - bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); - ++i; - } - } + error(1, 0, "bpf_flow_load %s", cfg_path_name); ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0); if (ret) @@ -69,7 +37,6 @@ static void load_and_attach_program(void) ret = bpf_object__pin(obj, cfg_pin_path); if (ret) error(1, 0, "bpf_object__pin %s", cfg_pin_path); - } static void detach_program(void) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h new file mode 100644 index 000000000000..41dd6959feb0 --- /dev/null +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +#ifndef FLOW_DISSECTOR_LOAD +#define FLOW_DISSECTOR_LOAD + +#include +#include + +static inline int bpf_flow_load(struct bpf_object **obj, + const char *path, + const char *section_name, + const char *map_name, + int *prog_fd) +{ + struct bpf_program *prog, *main_prog; + struct bpf_map *prog_array; + int prog_array_fd; + int ret, fd, i; + + ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, + prog_fd); + if (ret) + return ret; + + main_prog = bpf_object__find_program_by_title(*obj, section_name); + if (!main_prog) + return ret; + + *prog_fd = bpf_program__fd(main_prog); + if (*prog_fd < 0) + return ret; + + prog_array = bpf_object__find_map_by_name(*obj, map_name); + if (!prog_array) + return ret; + + prog_array_fd = bpf_map__fd(prog_array); + if (prog_array_fd < 0) + return ret; + + i = 0; + bpf_object__for_each_program(prog, *obj) { + fd = bpf_program__fd(prog); + if (fd < 0) + return fd; + + if (fd != *prog_fd) { + bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); + ++i; + } + } + + return 0; +} + +#endif /* FLOW_DISSECTOR_LOAD */ diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index a0bd04befe87..179f1d8ec5bf 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -134,6 +135,12 @@ static struct btf_header hdr_tmpl = { .hdr_len = sizeof(struct btf_header), }; +/* several different mapv kinds(types) supported by pprint */ +enum pprint_mapv_kind_t { + PPRINT_MAPV_KIND_BASIC = 0, + PPRINT_MAPV_KIND_INT128, +}; + struct btf_raw_test { const char *descr; const char *str_sec; @@ -156,6 +163,7 @@ struct btf_raw_test { int type_off_delta; int str_off_delta; int str_len_delta; + enum pprint_mapv_kind_t mapv_kind; }; #define BTF_STR_SEC(str) \ @@ -2707,6 +2715,99 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid member offset", }, +{ + .descr = "128-bit int", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 120, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(120, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -3530,6 +3631,16 @@ struct pprint_mapv { uint32_t bits2c:2; }; +#ifdef __SIZEOF_INT128__ +struct pprint_mapv_int128 { + __int128 si128a; + __int128 si128b; + unsigned __int128 bits3:3; + unsigned __int128 bits80:80; + unsigned __int128 ui128; +}; +#endif + static struct btf_raw_test pprint_test_template[] = { { .raw_types = { @@ -3721,6 +3832,35 @@ static struct btf_raw_test pprint_test_template[] = { .max_entries = 128 * 1024, }, +#ifdef __SIZEOF_INT128__ +{ + /* test int128 */ + .raw_types = { + /* unsigned int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* __int128 */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 128, 16), + /* unsigned __int128 */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 128, 16), + /* struct pprint_mapv_int128 */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 5), 64), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), /* si128a */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 128)), /* si128b */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(3, 256)), /* bits3 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(80, 259)), /* bits80 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(0, 384)), /* ui128 */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned int\0__int128\0unsigned __int128\0pprint_mapv_int128\0si128a\0si128b\0bits3\0bits80\0ui128"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv_int128), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 128 * 1024, + .mapv_kind = PPRINT_MAPV_KIND_INT128, +}, +#endif + }; static struct btf_pprint_test_meta { @@ -3787,24 +3927,108 @@ static struct btf_pprint_test_meta { }; +static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) +{ + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) + return sizeof(struct pprint_mapv); -static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) + return sizeof(struct pprint_mapv_int128); +#endif + + assert(0); +} + +static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, + void *mapv, uint32_t i, int num_cpus, int rounded_value_size) { int cpu; - for (cpu = 0; cpu < num_cpus; cpu++) { - v->ui32 = i + cpu; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; - v->ui32b = 4; - v->bits2c = 1; - v = (void *)v + rounded_value_size; + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; + v = (void *)v + rounded_value_size; + } } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->si128a = i; + v->si128b = -i; + v->bits3 = i & 0x07; + v->bits80 = (((unsigned __int128)1) << 64) + i; + v->ui128 = (((unsigned __int128)2) << 64) + i; + v = (void *)v + rounded_value_size; + } + } +#endif +} + +ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, + char *expected_line, ssize_t line_size, + bool percpu_map, unsigned int next_key, + int cpu, void *mapv) +{ + ssize_t nexpected_line = -1; + + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + v->ui32, v->si32, + v->unused_bits2a, + v->bits28, + v->unused_bits2b, + v->ui64, + v->ui8a[0], v->ui8a[1], + v->ui8a[2], v->ui8a[3], + v->ui8a[4], v->ui8a[5], + v->ui8a[6], v->ui8a[7], + pprint_enum_str[v->aenum], + v->ui32b, + v->bits2c); + } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {0x%lx,0x%lx,0x%lx," + "0x%lx%016lx,0x%lx%016lx}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + (uint64_t)v->si128a, + (uint64_t)v->si128b, + (uint64_t)v->bits3, + (uint64_t)(v->bits80 >> 64), + (uint64_t)v->bits80, + (uint64_t)(v->ui128 >> 64), + (uint64_t)v->ui128); + } +#endif + + return nexpected_line; } static int check_line(const char *expected_line, int nexpected_line, @@ -3828,10 +4052,10 @@ static int check_line(const char *expected_line, int nexpected_line, static int do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; + enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; - struct pprint_mapv *mapv = NULL; unsigned int key, nr_read_elems; int map_fd = -1, btf_fd = -1; unsigned int raw_btf_size; @@ -3840,6 +4064,7 @@ static int do_test_pprint(int test_num) char pin_path[255]; size_t line_len = 0; char *line = NULL; + void *mapv = NULL; uint8_t *raw_btf; ssize_t nread; @@ -3892,7 +4117,7 @@ static int do_test_pprint(int test_num) percpu_map = test->percpu_map; num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; - rounded_value_size = round_up(sizeof(struct pprint_mapv), 8); + rounded_value_size = round_up(get_pprint_mapv_size(mapv_kind), 8); mapv = calloc(num_cpus, rounded_value_size); if (CHECK(!mapv, "mapv allocation failure")) { err = -1; @@ -3900,7 +4125,7 @@ static int do_test_pprint(int test_num) } for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(mapv, key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, key, num_cpus, rounded_value_size); bpf_map_update_elem(map_fd, &key, mapv, 0); } @@ -3924,13 +4149,13 @@ static int do_test_pprint(int test_num) ordered_map = test->ordered_map; lossless_map = test->lossless_map; do { - struct pprint_mapv *cmapv; ssize_t nexpected_line; unsigned int next_key; + void *cmapv; int cpu; next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, next_key, num_cpus, rounded_value_size); cmapv = mapv; for (cpu = 0; cpu < num_cpus; cpu++) { @@ -3963,31 +4188,16 @@ static int do_test_pprint(int test_num) break; } - nexpected_line = snprintf(expected_line, sizeof(expected_line), - "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," - "%u,0x%x}\n", - percpu_map ? "\tcpu" : "", - percpu_map ? cpu : next_key, - cmapv->ui32, cmapv->si32, - cmapv->unused_bits2a, - cmapv->bits28, - cmapv->unused_bits2b, - cmapv->ui64, - cmapv->ui8a[0], cmapv->ui8a[1], - cmapv->ui8a[2], cmapv->ui8a[3], - cmapv->ui8a[4], cmapv->ui8a[5], - cmapv->ui8a[6], cmapv->ui8a[7], - pprint_enum_str[cmapv->aenum], - cmapv->ui32b, - cmapv->bits2c); - + nexpected_line = get_pprint_expected_line(mapv_kind, expected_line, + sizeof(expected_line), + percpu_map, next_key, + cpu, cmapv); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); if (err == -1) goto done; - cmapv = (void *)cmapv + rounded_value_size; + cmapv = cmapv + rounded_value_size; } if (percpu_map) { @@ -4083,6 +4293,10 @@ static struct prog_info_raw_test { __u32 line_info_rec_size; __u32 nr_jited_ksyms; bool expected_prog_load_failure; + __u32 dead_code_cnt; + __u32 dead_code_mask; + __u32 dead_func_cnt; + __u32 dead_func_mask; } info_raw_tests[] = { { .descr = "func_type (main func + one sub)", @@ -4509,6 +4723,369 @@ static struct prog_info_raw_test { .expected_prog_load_failure = true, }, +{ + .descr = "line_info (dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0/* dead jmp */\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 1, + .dead_code_mask = 0x01, +}, + +{ + .descr = "line_info (dead end)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0/* dead jmp */\0return a + b;\0/* dead exit */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 12), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 11), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 9), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 8), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 6, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x28, +}, + +{ + .descr = "line_info (dead code + subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0/* dead jmp */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 8), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {14, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(14, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(16, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 9, + .dead_code_mask = 0x3fe, +}, + +{ + .descr = "line_info (dead subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead last subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0int a=1+1;\0/* live call */" + "\0return 0;\0/* dead */\0/* dead */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x18, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* dead */" + "\0return 0;\0return 0;\0return 0;" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return b + 1;\0return b + 1;\0return b + 1;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {7, 3}, {10, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(13, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 5, + .dead_code_mask = 0x1e2, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start w/ move)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead end + subprog start w/ no linfo)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 3), + BPF_CALL_REL(3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 3}, {6, 4}, }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + }; static size_t probe_prog_length(const struct bpf_insn *fp) @@ -4568,6 +5145,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, struct bpf_func_info *finfo; __u32 info_len, rec_size, i; void *func_info = NULL; + __u32 nr_func_info; int err; /* get necessary lens */ @@ -4577,7 +5155,8 @@ static int test_get_finfo(const struct prog_info_raw_test *test, fprintf(stderr, "%s\n", btf_log_buf); return -1; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + nr_func_info = test->func_info_cnt - test->dead_func_cnt; + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (1st) %d", info.nr_func_info)) { return -1; @@ -4598,7 +5177,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, /* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.nr_func_info = test->func_info_cnt; + info.nr_func_info = nr_func_info; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); @@ -4607,7 +5186,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, err = -1; goto done; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (2nd) %d", info.nr_func_info)) { err = -1; @@ -4621,7 +5200,9 @@ static int test_get_finfo(const struct prog_info_raw_test *test, } finfo = func_info; - for (i = 0; i < test->func_info_cnt; i++) { + for (i = 0; i < nr_func_info; i++) { + if (test->dead_func_mask & (1 << i)) + continue; if (CHECK(finfo->type_id != test->func_info[i][1], "incorrect func_type %u expected %u", finfo->type_id, test->func_info[i][1])) { @@ -4650,6 +5231,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, struct bpf_prog_info info = {}; __u32 *jited_func_lens = NULL; __u64 cur_func_ksyms; + __u32 dead_insns; int err; jited_cnt = cnt; @@ -4658,7 +5240,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, if (test->nr_jited_ksyms) nr_jited_ksyms = test->nr_jited_ksyms; else - nr_jited_ksyms = test->func_info_cnt; + nr_jited_ksyms = test->func_info_cnt - test->dead_func_cnt; nr_jited_func_lens = nr_jited_ksyms; info_len = sizeof(struct bpf_prog_info); @@ -4760,12 +5342,20 @@ static int test_get_linfo(const struct prog_info_raw_test *test, goto done; } + dead_insns = 0; + while (test->dead_code_mask & (1 << dead_insns)) + dead_insns++; + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", linfo[0].insn_off); for (i = 1; i < cnt; i++) { const struct bpf_line_info *expected_linfo; - expected_linfo = patched_linfo + (i * test->line_info_rec_size); + while (test->dead_code_mask & (1 << (i + dead_insns))) + dead_insns++; + + expected_linfo = patched_linfo + + ((i + dead_insns) * test->line_info_rec_size); if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", i, linfo[i].insn_off, @@ -4923,7 +5513,9 @@ static int do_test_info_raw(unsigned int test_num) if (err) goto done; - err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + err = test_get_linfo(test, patched_linfo, + attr.line_info_cnt - test->dead_code_cnt, + prog_fd); if (err) goto done; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c index 12b784afba31..01f0c634d548 100644 --- a/tools/testing/selftests/bpf/test_flow_dissector.c +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -25,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index e2b9eee37187..1dfef77cff6f 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -633,7 +633,6 @@ static void test_stackmap(int task, void *data) close(fd); } -#include #include #include #include diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 126fc624290d..d8940b8b2f8d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -39,6 +39,7 @@ typedef __u16 __sum16; #include "bpf_endian.h" #include "bpf_rlimit.h" #include "trace_helpers.h" +#include "flow_dissector_load.h" static int error_cnt, pass_cnt; static bool jit_enabled; @@ -53,9 +54,10 @@ static struct { } __packed pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, - .iph.protocol = 6, + .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, + .tcp.doff = 5, }; /* ipv6 test vector */ @@ -65,9 +67,10 @@ static struct { struct tcphdr tcp; } __packed pkt_v6 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), - .iph.nexthdr = 6, + .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, + .tcp.doff = 5, }; #define _CHECK(condition, tag, duration, format...) ({ \ @@ -1188,7 +1191,9 @@ static void test_stacktrace_build_id(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) goto out; @@ -1301,6 +1306,19 @@ static void test_stacktrace_build_id(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; @@ -1341,7 +1359,9 @@ static void test_stacktrace_build_id_nmi(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) return; @@ -1436,6 +1456,19 @@ static void test_stacktrace_build_id_nmi(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; @@ -1882,6 +1915,76 @@ out: bpf_object__close(obj); } +#define CHECK_FLOW_KEYS(desc, got, expected) \ + CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \ + desc, \ + "nhoff=%u/%u " \ + "thoff=%u/%u " \ + "addr_proto=0x%x/0x%x " \ + "is_frag=%u/%u " \ + "is_first_frag=%u/%u " \ + "is_encap=%u/%u " \ + "n_proto=0x%x/0x%x " \ + "sport=%u/%u " \ + "dport=%u/%u\n", \ + got.nhoff, expected.nhoff, \ + got.thoff, expected.thoff, \ + got.addr_proto, expected.addr_proto, \ + got.is_frag, expected.is_frag, \ + got.is_first_frag, expected.is_first_frag, \ + got.is_encap, expected.is_encap, \ + got.n_proto, expected.n_proto, \ + got.sport, expected.sport, \ + got.dport, expected.dport) + +static struct bpf_flow_keys pkt_v4_flow_keys = { + .nhoff = 0, + .thoff = sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = bpf_htons(ETH_P_IP), +}; + +static struct bpf_flow_keys pkt_v6_flow_keys = { + .nhoff = 0, + .thoff = sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = bpf_htons(ETH_P_IPV6), +}; + +static void test_flow_dissector(void) +{ + struct bpf_flow_keys flow_keys; + struct bpf_object *obj; + __u32 duration, retval; + int err, prog_fd; + __u32 size; + + err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", + "jmp_table", &prog_fd); + if (err) { + error_cnt++; + return; + } + + err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv4", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("ipv4_flow_keys", flow_keys, pkt_v4_flow_keys); + + err = bpf_prog_test_run(prog_fd, 10, &pkt_v6, sizeof(pkt_v6), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, "ipv6", + "err %d errno %d retval %d duration %d size %u/%lu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys); + + bpf_object__close(obj); +} + int main(void) { srand(time(NULL)); @@ -1909,6 +2012,7 @@ int main(void) test_reference_tracking(); test_queue_stack_map(QUEUE); test_queue_stack_map(STACK); + test_flow_dissector(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index fc7832ee566b..e51d63786ff8 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -158,10 +158,8 @@ static int run_test(int cgfd) bpf_object__for_each_program(prog, pobj) { prog_name = bpf_program__title(prog, /*needs_copy*/ false); - if (libbpf_attach_type_by_name(prog_name, &attach_type)) { - log_err("Unexpected prog: %s", prog_name); + if (libbpf_attach_type_by_name(prog_name, &attach_type)) goto err; - } err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e85a771f607b..3845144e2c91 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 4e4353711a86..86152d9ae95b 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -148,17 +148,17 @@ int main(int argc, char **argv) pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); sprintf(test_script, - "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", + "iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); system(test_script); sprintf(test_script, - "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", + "nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); system(test_script); sprintf(test_script, - "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", + "iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); system(test_script); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2fd90d456892..c5e22422a852 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -211,15394 +211,46 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) BPF_MOV64_IMM(BPF_REG_5, 0), \ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) +/* BPF_DIRECT_PKT_R2 contains 7 instructions, it initializes default return + * value into 0 and does necessary preparation for direct packet access + * through r2. The allowed access range is 8 bytes. + */ +#define BPF_DIRECT_PKT_R2 \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ + offsetof(struct __sk_buff, data)), \ + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \ + offsetof(struct __sk_buff, data_end)), \ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), \ + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), \ + BPF_EXIT_INSN() + +/* BPF_RAND_UEXT_R7 contains 4 instructions, it initializes R7 into a random + * positive u32, and zero-extend it into 64-bit. + */ +#define BPF_RAND_UEXT_R7 \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, \ + BPF_FUNC_get_prandom_u32), \ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), \ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 33), \ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 33) + +/* BPF_RAND_SEXT_R7 contains 5 instructions, it initializes R7 into a random + * negative u32, and sign-extend it into 64-bit. + */ +#define BPF_RAND_SEXT_R7 \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, \ + BPF_FUNC_get_prandom_u32), \ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), \ + BPF_ALU64_IMM(BPF_OR, BPF_REG_7, 0x80000000), \ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 32), \ + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_7, 32) + static struct bpf_test tests[] = { - { - "add+sub+mul", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_2, 3), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = -3, - }, - { - "DIV32 by 0, zero check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "DIV32 by 0, zero check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "DIV64 by 0, zero check", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "MOD32 by 0, zero check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "MOD32 by 0, zero check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "MOD64 by 0, zero check", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "DIV32 by 0, zero check ok, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 2), - BPF_MOV32_IMM(BPF_REG_2, 16), - BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 8, - }, - { - "DIV32 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "DIV32 by 0, zero check 2, cls", - .insns = { - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "DIV64 by 0, zero check, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "MOD32 by 0, zero check ok, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_MOV32_IMM(BPF_REG_1, 3), - BPF_MOV32_IMM(BPF_REG_2, 5), - BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2, - }, - { - "MOD32 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "MOD32 by 0, zero check 2, cls", - .insns = { - BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "MOD64 by 0, zero check 1, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, 2), - BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2, - }, - { - "MOD64 by 0, zero check 2, cls", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_0, -1), - BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = -1, - }, - /* Just make sure that JITs used udiv/umod as otherwise we get - * an exception from INT_MIN/-1 overflow similarly as with div - * by zero. - */ - { - "DIV32 overflow, check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "DIV32 overflow, check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "DIV64 overflow, check 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), - BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "DIV64 overflow, check 2", - .insns = { - BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), - BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "MOD32 overflow, check 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = INT_MIN, - }, - { - "MOD32 overflow, check 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, INT_MIN), - BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = INT_MIN, - }, - { - "MOD64 overflow, check 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "MOD64 overflow, check 2", - .insns = { - BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "xor32 zero extend check", - .insns = { - BPF_MOV32_IMM(BPF_REG_2, -1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), - BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), - BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), - BPF_MOV32_IMM(BPF_REG_0, 2), - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), - BPF_MOV32_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "empty prog", - .insns = { - }, - .errstr = "unknown opcode 00", - .result = REJECT, - }, - { - "only exit insn", - .insns = { - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .result = REJECT, - }, - { - "unreachable", - .insns = { - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable", - .result = REJECT, - }, - { - "unreachable2", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unreachable", - .result = REJECT, - }, - { - "out of range jump", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "out of range jump2", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, -2), - BPF_EXIT_INSN(), - }, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "test1 ld_imm64", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "test2 ld_imm64", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "test3 ld_imm64", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_LD_IMM64(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test4 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test5 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test6 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - BPF_RAW_INSN(0, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "test7 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), - BPF_RAW_INSN(0, 0, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 1, - }, - { - "test8 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1), - BPF_RAW_INSN(0, 0, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "uses reserved fields", - .result = REJECT, - }, - { - "test9 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), - BPF_RAW_INSN(0, 0, 0, 1, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test10 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), - BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test11 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), - BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "test12 ld_imm64", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), - BPF_RAW_INSN(0, 0, 0, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "not pointing to valid bpf_map", - .result = REJECT, - }, - { - "test13 ld_imm64", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), - BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, - }, - { - "arsh32 on imm", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "arsh32 on imm 2", - .insns = { - BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), - BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = -16069393, - }, - { - "arsh32 on reg", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 5), - BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "arsh32 on reg 2", - .insns = { - BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), - BPF_MOV64_IMM(BPF_REG_1, 15), - BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 43724, - }, - { - "arsh64 on imm", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "arsh64 on reg", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 5), - BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "no bpf_exit", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), - }, - .errstr = "not an exit", - .result = REJECT, - }, - { - "loop (back-edge)", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, -1), - BPF_EXIT_INSN(), - }, - .errstr = "back-edge", - .result = REJECT, - }, - { - "loop2 (back-edge)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_JMP_IMM(BPF_JA, 0, 0, -4), - BPF_EXIT_INSN(), - }, - .errstr = "back-edge", - .result = REJECT, - }, - { - "conditional loop", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), - BPF_EXIT_INSN(), - }, - .errstr = "back-edge", - .result = REJECT, - }, - { - "read uninitialized register", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R2 !read_ok", - .result = REJECT, - }, - { - "read invalid register", - .insns = { - BPF_MOV64_REG(BPF_REG_0, -1), - BPF_EXIT_INSN(), - }, - .errstr = "R15 is invalid", - .result = REJECT, - }, - { - "program doesn't init R0 before exit", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .result = REJECT, - }, - { - "program doesn't init R0 before exit in all branches", - .insns = { - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "stack out of bounds", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack", - .result = REJECT, - }, - { - "invalid call insn1", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown opcode 8d", - .result = REJECT, - }, - { - "invalid call insn2", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_CALL uses reserved", - .result = REJECT, - }, - { - "invalid function call", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567), - BPF_EXIT_INSN(), - }, - .errstr = "invalid func unknown#1234567", - .result = REJECT, - }, - { - "uninitialized stack1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 2 }, - .errstr = "invalid indirect read from stack", - .result = REJECT, - }, - { - "uninitialized stack2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8), - BPF_EXIT_INSN(), - }, - .errstr = "invalid read from stack", - .result = REJECT, - }, - { - "invalid fp arithmetic", - /* If this gets ever changed, make sure JITs can deal with it. */ - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 subtraction from stack pointer", - .result = REJECT, - }, - { - "non-invalid fp arithmetic", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "invalid argument register", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_cgroup_classid), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_cgroup_classid), - BPF_EXIT_INSN(), - }, - .errstr = "R1 !read_ok", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "non-invalid argument register", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_cgroup_classid), - BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_cgroup_classid), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "check valid spill/fill", - .insns = { - /* spill R1(ctx) into stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* fill it back into R2 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), - /* should be able to access R0 = *(R2 + 8) */ - /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */ - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .retval = POINTER_VALUE, - }, - { - "check valid spill/fill, skb mark", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = ACCEPT, - }, - { - "check corrupted spill/fill", - .insns = { - /* spill R1(ctx) into stack */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - /* mess up with R1 pointer on stack */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 is fine for priv. - * R0 now becomes SCALAR_VALUE. - */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - /* Load from R0 should fail. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .errstr = "R0 invalid mem access 'inv", - .result = REJECT, - }, - { - "check corrupted spill/fill, LSB", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = POINTER_VALUE, - }, - { - "check corrupted spill/fill, MSB", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = POINTER_VALUE, - }, - { - "invalid src register in STX", - .insns = { - BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1), - BPF_EXIT_INSN(), - }, - .errstr = "R15 is invalid", - .result = REJECT, - }, - { - "invalid dst register in STX", - .insns = { - BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1), - BPF_EXIT_INSN(), - }, - .errstr = "R14 is invalid", - .result = REJECT, - }, - { - "invalid dst register in ST", - .insns = { - BPF_ST_MEM(BPF_B, 14, -1, -1), - BPF_EXIT_INSN(), - }, - .errstr = "R14 is invalid", - .result = REJECT, - }, - { - "invalid src register in LDX", - .insns = { - BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R12 is invalid", - .result = REJECT, - }, - { - "invalid dst register in LDX", - .insns = { - BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R11 is invalid", - .result = REJECT, - }, - { - "junk insn", - .insns = { - BPF_RAW_INSN(0, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown opcode 00", - .result = REJECT, - }, - { - "junk insn2", - .insns = { - BPF_RAW_INSN(1, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_LDX uses reserved fields", - .result = REJECT, - }, - { - "junk insn3", - .insns = { - BPF_RAW_INSN(-1, 0, 0, 0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "unknown opcode ff", - .result = REJECT, - }, - { - "junk insn4", - .insns = { - BPF_RAW_INSN(-1, -1, -1, -1, -1), - BPF_EXIT_INSN(), - }, - .errstr = "unknown opcode ff", - .result = REJECT, - }, - { - "junk insn5", - .insns = { - BPF_RAW_INSN(0x7f, -1, -1, -1, -1), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ALU uses reserved fields", - .result = REJECT, - }, - { - "misaligned read from stack", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned stack access", - .result = REJECT, - }, - { - "invalid map_fd for function call", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_delete_elem), - BPF_EXIT_INSN(), - }, - .errstr = "fd 0 is not pointing to valid bpf_map", - .result = REJECT, - }, - { - "don't check return value before access", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access 'map_value_or_null'", - .result = REJECT, - }, - { - "access memory with incorrect alignment", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "misaligned value access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "sometimes access memory with incorrect alignment", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", - .errstr_unpriv = "R0 leaks addr", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "jump test 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "jump test 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 14), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 11), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 5), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "jump test 3", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 19), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_JMP_IMM(BPF_JA, 0, 0, 15), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32), - BPF_JMP_IMM(BPF_JA, 0, 0, 11), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40), - BPF_JMP_IMM(BPF_JA, 0, 0, 7), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), - BPF_JMP_IMM(BPF_JA, 0, 0, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_delete_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 24 }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = -ENOENT, - }, - { - "jump test 4", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "jump test 5", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "access skb fields ok", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, pkt_type)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, queue_mapping)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, protocol)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, vlan_present)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, vlan_tci)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, napi_id)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "access skb fields bad1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "access skb fields bad2", - .insns = { - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, pkt_type)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "different pointers", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "access skb fields bad3", - .insns = { - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, pkt_type)), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JA, 0, 0, -12), - }, - .fixup_map_hash_8b = { 6 }, - .errstr = "different pointers", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "access skb fields bad4", - .insns = { - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JA, 0, 0, -13), - }, - .fixup_map_hash_8b = { 7 }, - .errstr = "different pointers", - .errstr_unpriv = "R1 pointer comparison", - .result = REJECT, - }, - { - "invalid access __sk_buff family", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, family)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff remote_ip4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip4)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff local_ip4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip4)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff remote_ip6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff local_ip6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff remote_port", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_port)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "invalid access __sk_buff remote_port", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_port)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "valid access __sk_buff family", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, family)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff remote_ip4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff local_ip4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff remote_ip6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff local_ip6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff remote_port", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, remote_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access __sk_buff remote_port", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, local_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "invalid access of tc_classid for SK_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - .errstr = "invalid bpf_context access", - }, - { - "invalid access of skb->mark for SK_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - .errstr = "invalid bpf_context access", - }, - { - "check skb->mark is not writeable by SK_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - .errstr = "invalid bpf_context access", - }, - { - "check skb->tc_index is writeable by SK_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tc_index)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "check skb->priority is writeable by SK_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, priority)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "direct packet read for SK_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "direct packet write for SK_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "overlapping checks for direct packet access SK_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access family in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, family)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "valid access remote_ip4 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "valid access local_ip4 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip4)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "valid access remote_port in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "valid access local_port in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_port)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "valid access remote_ip6 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, remote_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access local_ip6 in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, local_ip6[3])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_SKB, - }, - { - "valid access size in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct sk_msg_md, size)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "invalid 64B read of size in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, size)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "invalid read past end of SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, size) + 4), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "invalid read offset in SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, family) + 1), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet read for SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "direct packet write for SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "overlapping checks for direct packet access SK_MSG", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, data)), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, - offsetof(struct sk_msg_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SK_MSG, - }, - { - "check skb->mark is not writeable by sockets", - .insns = { - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .errstr_unpriv = "R1 leaks addr", - .result = REJECT, - }, - { - "check skb->tc_index is not writeable by sockets", - .insns = { - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, tc_index)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .errstr_unpriv = "R1 leaks addr", - .result = REJECT, - }, - { - "check cb access: byte", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 1), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 2), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 3), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1])), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1]) + 1), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1]) + 2), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1]) + 3), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2])), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2]) + 1), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2]) + 2), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2]) + 3), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3])), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3]) + 1), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3]) + 2), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3]) + 3), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4])), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 1), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 2), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) + 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1])), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1]) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1]) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1]) + 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2]) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2]) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2]) + 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3]) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3]) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3]) + 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 2), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "__sk_buff->hash, offset 0, byte store not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, hash)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "__sk_buff->tc_index, offset 3, byte store not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tc_index) + 3), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check skb->hash byte load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash)), -#else - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 3), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check skb->hash byte load permitted 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check skb->hash byte load permitted 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check skb->hash byte load permitted 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 3), -#else - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash)), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check cb access: byte, wrong type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "check cb access: half", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 2), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1])), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1]) + 2), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2])), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2]) + 2), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3])), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3]) + 2), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4])), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 2), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) + 2), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1])), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1]) + 2), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2]) + 2), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3]) + 2), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4]) + 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check cb access: half, unaligned", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 1), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check __sk_buff->hash, offset 0, half store not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, hash)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check __sk_buff->tc_index, offset 2, half store not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tc_index) + 2), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check skb->hash half load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash)), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 2), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check skb->hash half load permitted 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 2), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash)), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check skb->hash half load not permitted, unaligned 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 1), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 3), -#endif - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check skb->hash half load not permitted, unaligned 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 3), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, hash) + 1), -#endif - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "check cb access: half, wrong type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "check cb access: word", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check cb access: word, unaligned 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0]) + 2), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: word, unaligned 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 1), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: word, unaligned 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 2), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: word, unaligned 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4]) + 3), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: double", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check cb access: double, unaligned 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[1])), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: double, unaligned 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3])), - BPF_EXIT_INSN(), - }, - .errstr = "misaligned context access", - .result = REJECT, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "check cb access: double, oob 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[4])), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, oob 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check __sk_buff->ifindex dw store not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, ifindex)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check __sk_buff->ifindex dw load not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, ifindex)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check cb access: double, wrong type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "check out of range skb->cb access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0]) + 256), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .errstr_unpriv = "", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_ACT, - }, - { - "write skb fields from socket prog", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_index)), - BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .errstr_unpriv = "R1 leaks addr", - .result_unpriv = REJECT, - }, - { - "write skb fields from tc_cls_act prog", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_index)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tc_index)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tstamp)), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tstamp)), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "", - .result_unpriv = REJECT, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "PTR_TO_STACK store/load", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xfaceb00c, - }, - { - "PTR_TO_STACK store/load - bad alignment on off", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", - }, - { - "PTR_TO_STACK store/load - bad alignment on reg", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", - }, - { - "PTR_TO_STACK store/load - out of bounds low", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off=-79992 size=8", - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - }, - { - "PTR_TO_STACK store/load - out of bounds high", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off=0 size=8", - }, - { - "unpriv: return pointer", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 leaks addr", - .retval = POINTER_VALUE, - }, - { - "unpriv: add const to pointer", - .insns = { - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "unpriv: add pointer to pointer", - .insns = { - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R1 pointer += pointer", - }, - { - "unpriv: neg pointer", - .insns = { - BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer arithmetic", - }, - { - "unpriv: cmp pointer with const", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer comparison", - }, - { - "unpriv: cmp pointer with pointer", - .insns = { - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R10 pointer comparison", - }, - { - "unpriv: check that printk is disallowed", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_trace_printk), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "unknown func bpf_trace_printk#6", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: pass pointer to helper function", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_update_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr_unpriv = "R4 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: indirectly pass pointer on stack to helper function", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "invalid indirect read from stack off -8+0 size 8", - .result = REJECT, - }, - { - "unpriv: mangle pointer on stack 1", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), - BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: mangle pointer on stack 2", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), - BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "attempt to corrupt spilled", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: read pointer from stack in small chunks", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid size", - .result = REJECT, - }, - { - "unpriv: write pointer into ctx", - .insns = { - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 leaks addr", - .result_unpriv = REJECT, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "unpriv: spill/fill of ctx", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "unpriv: spill/fill of ctx 2", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_hash_recalc), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of ctx 3", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_hash_recalc), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R1 type=fp expected=ctx", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of ctx 4", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, - BPF_REG_0, -8, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_hash_recalc), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R1 type=inv expected=ctx", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers stx", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 42), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, - offsetof(struct __sk_buff, mark)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "same insn cannot be used with different pointers", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers stx - ctx and sock", - .insns = { - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - /* u64 foo; */ - /* void *target = &foo; */ - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - /* if (skb == NULL) *target = sock; */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - /* else *target = skb; */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - /* struct __sk_buff *skb = *target; */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - /* skb->mark = 42; */ - BPF_MOV64_IMM(BPF_REG_3, 42), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, - offsetof(struct __sk_buff, mark)), - /* if (sk) bpf_sk_release(sk) */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "type=ctx expected=sock", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers stx - leak sock", - .insns = { - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - /* u64 foo; */ - /* void *target = &foo; */ - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - /* if (skb == NULL) *target = sock; */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - /* else *target = skb; */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - /* struct __sk_buff *skb = *target; */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - /* skb->mark = 42; */ - BPF_MOV64_IMM(BPF_REG_3, 42), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - //.errstr = "same insn cannot be used with different pointers", - .errstr = "Unreleased reference", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers stx - sock and ctx (read)", - .insns = { - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - /* u64 foo; */ - /* void *target = &foo; */ - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - /* if (skb) *target = skb */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - /* else *target = sock */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - /* struct bpf_sock *sk = *target; */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct bpf_sock, mark)), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "same insn cannot be used with different pointers", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers stx - sock and ctx (write)", - .insns = { - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - /* struct bpf_sock *sock = bpf_sock_lookup(...); */ - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - /* u64 foo; */ - /* void *target = &foo; */ - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - /* if (skb) *target = skb */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - /* else *target = sock */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - /* struct bpf_sock *sk = *target; */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - /* if (sk) sk->mark = 42; bpf_sk_release(sk); */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_MOV64_IMM(BPF_REG_3, 42), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, - offsetof(struct bpf_sock, mark)), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - //.errstr = "same insn cannot be used with different pointers", - .errstr = "cannot write into socket", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "unpriv: spill/fill of different pointers ldx", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, - -(__s32)offsetof(struct bpf_perf_event_data, - sample_period) - 8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, - offsetof(struct bpf_perf_event_data, - sample_period)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "same insn cannot be used with different pointers", - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "unpriv: write pointer into map elem value", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "alu32: mov u32 const", - .insns = { - BPF_MOV32_IMM(BPF_REG_7, 0), - BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), - BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "unpriv: partial copy of pointer", - .insns = { - BPF_MOV32_REG(BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R10 partial copy", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: pass pointer to tail_call", - .insns = { - BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .errstr_unpriv = "R3 leaks addr into helper", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: cmp map pointer with zero", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .errstr_unpriv = "R1 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: write into frame pointer", - .insns = { - BPF_MOV64_REG(BPF_REG_10, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "frame pointer is read only", - .result = REJECT, - }, - { - "unpriv: spill/fill frame pointer", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "frame pointer is read only", - .result = REJECT, - }, - { - "unpriv: cmp of frame pointer", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R10 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: adding of fp", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "unpriv: cmp of stack pointer", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R2 pointer comparison", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "runtime/jit: tail_call within bounds, prog once", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .result = ACCEPT, - .retval = 42, - }, - { - "runtime/jit: tail_call within bounds, prog loop", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .result = ACCEPT, - .retval = 41, - }, - { - "runtime/jit: tail_call within bounds, no prog", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .result = ACCEPT, - .retval = 1, - }, - { - "runtime/jit: tail_call out of bounds", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 256), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .result = ACCEPT, - .retval = 2, - }, - { - "runtime/jit: pass negative index to tail_call", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, -1), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 1 }, - .result = ACCEPT, - .retval = 2, - }, - { - "runtime/jit: pass > 32bit index to tail_call", - .insns = { - BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 2 }, - .result = ACCEPT, - .retval = 42, - /* Verifier rewrite for unpriv skips tail call here. */ - .retval_unpriv = 2, - }, - { - "PTR_TO_STACK check high 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK check high 2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK check high 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), - BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK check high 4", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=0 size=1", - .result = REJECT, - }, - { - "PTR_TO_STACK check high 5", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off", - }, - { - "PTR_TO_STACK check high 6", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off", - }, - { - "PTR_TO_STACK check high 7", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "fp pointer offset", - }, - { - "PTR_TO_STACK check low 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK check low 2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), - BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK check low 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "invalid stack off=-513 size=1", - .result = REJECT, - }, - { - "PTR_TO_STACK check low 4", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "math between fp pointer", - }, - { - "PTR_TO_STACK check low 5", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off", - }, - { - "PTR_TO_STACK check low 6", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack off", - }, - { - "PTR_TO_STACK check low 7", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), - BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .errstr = "fp pointer offset", - }, - { - "PTR_TO_STACK mixed reg/k, 1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK mixed reg/k, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 42, - }, - { - "PTR_TO_STACK mixed reg/k, 3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = -3, - }, - { - "PTR_TO_STACK reg", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_2, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid stack off=0 size=1", - .result = ACCEPT, - .retval = 42, - }, - { - "stack pointer arithmetic", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), - BPF_ST_MEM(0, BPF_REG_2, 4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_ST_MEM(0, BPF_REG_2, 4, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "raw_stack: no skb_load_bytes", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - /* Call to skb_load_bytes() omitted. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid read from stack off -8+0 size 8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, negative len", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, negative len 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, ~0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, zero len", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack type R3", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, no init", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, init", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, spilled regs around bounds", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, spilled regs corruption", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 invalid mem access 'inv'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "raw_stack: skb_load_bytes, spilled regs corruption 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3, - offsetof(struct __sk_buff, pkt_type)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R3 invalid mem access 'inv'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "raw_stack: skb_load_bytes, spilled regs + data", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, - offsetof(struct __sk_buff, priority)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack type R3 off=-513 access_size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack type R3 off=-1 access_size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, invalid access 6", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid stack type R3 off=-512 access_size=0", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "raw_stack: skb_load_bytes, large access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_4, 512), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "context stores via ST", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ST stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "context stores via XADD", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, - BPF_REG_0, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7), - BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), - BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access off=76", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - }, - { - "direct packet access: test4 (write)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test5 (pkt_end >= reg, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test6 (pkt_end >= reg, bad access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid access to packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test7 (pkt_end >= reg, both accesses)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid access to packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test8 (double test, variant 1)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test9 (double test, variant 2)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test10 (write invalid)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid access to packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test11 (shift, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), - BPF_MOV64_IMM(BPF_REG_3, 144), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 1, - }, - { - "direct packet access: test12 (and, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), - BPF_MOV64_IMM(BPF_REG_3, 144), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 1, - }, - { - "direct packet access: test13 (branches, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2), - BPF_MOV64_IMM(BPF_REG_3, 14), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_MOV64_IMM(BPF_REG_3, 24), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 1, - }, - { - "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), - BPF_MOV64_IMM(BPF_REG_5, 12), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 1, - }, - { - "direct packet access: test15 (spill with xadd)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), - BPF_MOV64_IMM(BPF_REG_5, 4096), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), - BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R2 invalid mem access 'inv'", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test16 (arith on data_end)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R3 pointer arithmetic on pkt_end", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test17 (pruning, alignment)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), - BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), - BPF_JMP_A(-6), - }, - .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - }, - { - "direct packet access: test18 (imm += pkt_ptr, 1)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_IMM(BPF_REG_0, 8), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test19 (imm += pkt_ptr, 2)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - BPF_MOV64_IMM(BPF_REG_4, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), - BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test20 (x += pkt_ptr, 1)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test21 (x += pkt_ptr, 2)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), - BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test22 (x += pkt_ptr, 3)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), - BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test23 (x += pkt_ptr, 4)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_0, 31), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test24 (x += pkt_ptr, 5)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_0, 64), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet access: test25 (marking on <, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, -4), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test26 (marking on <, bad access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JA, 0, 0, -3), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "direct packet access: test27 (marking on <=, good access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 1, - }, - { - "direct packet access: test28 (marking on <=, bad access)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, -4), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test1, valid packet_ptr range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_update_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result_unpriv = ACCEPT, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "helper access to packet: test2, unchecked packet_ptr", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "helper access to packet: test3, variable add", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "helper access to packet: test4, packet_ptr with bad range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 7 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "helper access to packet: test5, packet_ptr with too short range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "helper access to packet: test6, cls valid packet_ptr range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_update_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test7, cls unchecked packet_ptr", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test8, cls variable add", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test9, cls packet_ptr with bad range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 7 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test10, cls packet_ptr with too short range", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 6 }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test11, cls unsuitable helper 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_4, 42), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_store_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "helper access to the packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test12, cls unsuitable helper 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_4, 4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "helper access to the packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test13, cls helper ok", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test14, cls helper ok sub", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test15, cls helper fail sub", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test16, cls helper fail range 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test17, cls helper fail range 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, -9), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R2 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test18, cls helper fail range 3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, ~0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R2 min value is negative", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test19, cls helper range zero", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test20, pkt end as input", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R1 type=pkt_end expected=fp", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to packet: test21, wrong reg", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_diff), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "prevent map lookup in sockmap", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_sockmap = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_SOCK_OPS, - }, - { - "prevent map lookup in sockhash", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_sockhash = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_SOCK_OPS, - }, - { - "prevent map lookup in xskmap", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_xskmap = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "prevent map lookup in stack trace", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_stacktrace = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem", - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "prevent map lookup in prog array", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_prog2 = { 3 }, - .result = REJECT, - .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem", - }, - { - "valid map access into an array with a constant", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "valid map access into an array with a register", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "valid map access into an array with a variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "valid map access into an array with a signed variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), - BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid map access into an array with a constant", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=48 size=8", - .result = REJECT, - }, - { - "invalid map access into an array with a register", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 min value is outside of the array range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid map access into an array with a variable", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid map access into an array with no floor check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), - BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", - .result_unpriv = REJECT, - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid map access into an array with a invalid max check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .errstr = "invalid access to map value, value_size=48 off=44 size=8", - .result_unpriv = REJECT, - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid map access into an array with a invalid max check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3, 11 }, - .errstr = "R0 pointer += pointer", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "direct packet read test#1 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, pkt_type)), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, queue_mapping)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, protocol)), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, vlan_present)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid bpf_context access off=76 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "direct packet read test#2 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, vlan_tci)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, vlan_proto)), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, priority)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, priority)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, - ingress_ifindex)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, tc_index)), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, hash)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "direct packet read test#3 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, cb[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, cb[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, cb[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, cb[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, cb[4])), - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, napi_id)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, - offsetof(struct __sk_buff, cb[1])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, cb[2])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, - offsetof(struct __sk_buff, cb[3])), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, - offsetof(struct __sk_buff, cb[4])), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "direct packet read test#4 for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, family)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip4)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct __sk_buff, local_ip4)), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, - offsetof(struct __sk_buff, remote_ip6[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[0])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[1])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[2])), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, local_ip6[3])), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct __sk_buff, remote_port)), - BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, - offsetof(struct __sk_buff, local_port)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid access of tc_classid for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid access of data_meta for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data_meta)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid access of flow_keys for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, flow_keys)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid write access to napi_id for CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, napi_id)), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, - offsetof(struct __sk_buff, napi_id)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "valid cgroup storage access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid cgroup storage access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid cgroup storage access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "fd 1 is not pointing to valid bpf_map", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid cgroup storage access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=256 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid cgroup storage access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=-2 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid cgroup storage access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 7), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid cgroup storage access 6", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .errstr_unpriv = "R2 leaks addr into helper function", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "valid per-cpu cgroup storage access", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid per-cpu cgroup storage access 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .result = REJECT, - .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid per-cpu cgroup storage access 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "fd 1 is not pointing to valid bpf_map", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid per-cpu cgroup storage access 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=256 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid per-cpu cgroup storage access 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_EXIT_INSN(), - }, - .fixup_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=64 off=-2 size=4", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid per-cpu cgroup storage access 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 7), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "invalid per-cpu cgroup storage access 6", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_percpu_cgroup_storage = { 1 }, - .result = REJECT, - .errstr = "get_local_storage() doesn't support non-zero flags", - .errstr_unpriv = "R2 leaks addr into helper function", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "write tstamp from CGROUP_SKB", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, tstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid bpf_context access off=152 size=8", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "read tstamp from CGROUP_SKB", - .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tstamp)), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - }, - { - "multiple registers share map_lookup_elem result", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "alu ops on ptr_to_map_value_or_null, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "alu ops on ptr_to_map_value_or_null, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "alu ops on ptr_to_map_value_or_null, 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "R4 pointer arithmetic on map_value_or_null", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "invalid memory access with multiple map_lookup_elem calls", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = REJECT, - .errstr = "R4 !read_ok", - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "valid indirect map_lookup_elem access with 2nd lookup in branch", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_2, 10), - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { - "invalid map access from else condition", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 unbounded memory access", - .result = REJECT, - .errstr_unpriv = "R0 leaks addr", - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "constant register |= constant should keep constant type", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "constant register |= constant should not bypass stack boundary checks", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack type R1 off=-48 access_size=58", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "constant register |= constant register should keep constant type", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_MOV64_IMM(BPF_REG_4, 13), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "constant register |= constant register should not bypass stack boundary checks", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_MOV64_IMM(BPF_REG_4, 24), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack type R1 off=-48 access_size=58", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "invalid direct packet write for LWT_IN", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "cannot write into packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "invalid direct packet write for LWT_OUT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "cannot write into packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_OUT, - }, - { - "direct packet write for LWT_XMIT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_XMIT, - }, - { - "direct packet read for LWT_IN", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "direct packet read for LWT_OUT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_OUT, - }, - { - "direct packet read for LWT_XMIT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_XMIT, - }, - { - "overlapping checks for direct packet access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_XMIT, - }, - { - "make headroom for LWT_XMIT", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_2, 34), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_skb_change_head), - /* split for s390 to succeed */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_2, 42), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_skb_change_head), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_LWT_XMIT, - }, - { - "invalid access of tc_classid for LWT_IN", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - }, - { - "invalid access of tc_classid for LWT_OUT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - }, - { - "invalid access of tc_classid for LWT_XMIT", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - }, - { - "leak pointer into ctx 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 2 }, - .errstr_unpriv = "R2 leaks addr into mem", - .result_unpriv = REJECT, - .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", - }, - { - "leak pointer into ctx 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, - offsetof(struct __sk_buff, cb[0])), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "R10 leaks addr into mem", - .result_unpriv = REJECT, - .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", - }, - { - "leak pointer into ctx 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, - offsetof(struct __sk_buff, cb[0])), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1 }, - .errstr_unpriv = "R2 leaks addr into ctx", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "leak pointer into map val", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr_unpriv = "R6 leaks addr into mem", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "helper access to map: full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=0", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=56", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: negative range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=0", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=52", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): negative range (> adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const imm): negative range (< adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the array range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): out-of-bound range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo) + 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=52", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): negative range (> adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via const reg): negative range (< adjustment)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct test_val, foo)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via variable): full range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via variable): partial range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via variable): empty range", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct test_val, foo), 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_trace_printk), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the array range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via variable): no max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 unbounded memory access", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to adjusted map (via variable): wrong max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct test_val, foo), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, - sizeof(struct test_val) - - offsetof(struct test_val, foo) + 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=45", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using <, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using <, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 unbounded memory access", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using <=, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using <=, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 unbounded memory access", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<, good access 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 min value is negative", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<=, good access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<=, good access 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to map: bounds check using s<=, bad access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = REJECT, - .errstr = "R1 min value is negative", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map access: known scalar += value_ptr from different maps", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 5 }, - .fixup_map_array_48b = { 8 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 tried to add from different maps", - .retval = 1, - }, - { - "map access: value_ptr -= known scalar from different maps", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 5 }, - .fixup_map_array_48b = { 8 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 min value is outside of the array range", - .retval = 1, - }, - { - "map access: known scalar += value_ptr from different maps, but same value properties", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 5 }, - .fixup_map_array_48b = { 8 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: mixing value pointer and scalar, 1", - .insns = { - // load map value pointer into r0 and r2 - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_LD_MAP_FD(BPF_REG_ARG1, 0), - BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - // load some number from the map into r1 - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - // depending on r1, branch: - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3), - // branch A - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_JMP_A(2), - // branch B - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0x100000), - // common instruction - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - // depending on r1, branch: - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - // branch A - BPF_JMP_A(4), - // branch B - BPF_MOV64_IMM(BPF_REG_0, 0x13371337), - // verifier follows fall-through - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - // fake-dead code; targeted from branch A to - // prevent dead code sanitization - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different pointers or scalars", - .retval = 0, - }, - { - "map access: mixing value pointer and scalar, 2", - .insns = { - // load map value pointer into r0 and r2 - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_LD_MAP_FD(BPF_REG_ARG1, 0), - BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - // load some number from the map into r1 - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - // depending on r1, branch: - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - // branch A - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0x100000), - BPF_JMP_A(2), - // branch B - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 0), - // common instruction - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - // depending on r1, branch: - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), - // branch A - BPF_JMP_A(4), - // branch B - BPF_MOV64_IMM(BPF_REG_0, 0x13371337), - // verifier follows fall-through - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - // fake-dead code; targeted from branch A to - // prevent dead code sanitization - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps or paths", - .retval = 0, - }, - { - "sanitation: alu with different scalars", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_LD_MAP_FD(BPF_REG_ARG1, 0), - BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), - BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0x100000), - BPF_JMP_A(2), - BPF_MOV64_IMM(BPF_REG_2, 42), - BPF_MOV64_IMM(BPF_REG_3, 0x100001), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 1 }, - .result = ACCEPT, - .retval = 0x100000, - }, - { - "map access: value_ptr += known scalar, upper oob arith, test 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 48), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr += known scalar, upper oob arith, test 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 49), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr += known scalar, upper oob arith, test 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 47), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr -= known scalar, lower oob arith, test 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_IMM(BPF_REG_1, 47), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 48), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R0 min value is outside of the array range", - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - }, - { - "map access: value_ptr -= known scalar, lower oob arith, test 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_IMM(BPF_REG_1, 47), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 48), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr -= known scalar, lower oob arith, test 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_IMM(BPF_REG_1, 47), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 47), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: known scalar += value_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: value_ptr += known scalar, 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: value_ptr += known scalar, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 49), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "invalid access to map value", - }, - { - "map access: value_ptr += known scalar, 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "invalid access to map value", - }, - { - "map access: value_ptr += known scalar, 4", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_IMM(BPF_REG_1, 5), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, -2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr += known scalar, 5", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 0xabcdef12, - }, - { - "map access: value_ptr += known scalar, 6", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 0xabcdef12, - }, - { - "map access: unknown scalar += value_ptr, 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: unknown scalar += value_ptr, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 0xabcdef12, - }, - { - "map access: unknown scalar += value_ptr, 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 0xabcdef12, - }, - { - "map access: unknown scalar += value_ptr, 4", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_1, 19), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R1 max value is outside of the array range", - .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", - }, - { - "map access: value_ptr += unknown scalar, 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: value_ptr += unknown scalar, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 0xabcdef12, - }, - { - "map access: value_ptr += unknown scalar, 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1), - BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_JMP_IMM(BPF_JA, 0, 0, -3), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .retval = 1, - }, - { - "map access: value_ptr += value_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R0 pointer += pointer prohibited", - }, - { - "map access: known scalar -= value_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R1 tried to subtract pointer from scalar", - }, - { - "map access: value_ptr -= known scalar", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_MOV64_IMM(BPF_REG_1, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R0 min value is outside of the array range", - }, - { - "map access: value_ptr -= known scalar, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_IMM(BPF_REG_1, 6), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: unknown scalar -= value_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R1 tried to subtract pointer from scalar", - }, - { - "map access: value_ptr -= unknown scalar", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R0 min value is negative", - }, - { - "map access: value_ptr -= unknown scalar, 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), - BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .retval = 1, - }, - { - "map access: value_ptr -= value_ptr", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_map_array_48b = { 3 }, - .result = REJECT, - .errstr = "R0 invalid mem access 'inv'", - .errstr_unpriv = "R0 pointer -= pointer prohibited", - }, - { - "map lookup helper access to map", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 8 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map update helper access to map", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_update_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map update helper access to map: wrong size", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_update_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .fixup_map_hash_16b = { 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=8 off=0 size=16", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const imm)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, - offsetof(struct other_val, bar)), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const imm): out-of-bound 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, - sizeof(struct other_val) - 4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=12 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const imm): out-of-bound 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 9 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=-4 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const reg)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - offsetof(struct other_val, bar)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const reg): out-of-bound 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, - sizeof(struct other_val) - 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=12 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via const reg): out-of-bound 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, -4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=-4 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via variable)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct other_val, bar), 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 11 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via variable): no max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 10 }, - .result = REJECT, - .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map helper access to adjusted map (via variable): wrong max check", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, - offsetof(struct other_val, bar) + 1, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_16b = { 3, 11 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=16 off=9 size=8", - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "map element value is preserved across register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - }, - { - "map element value or null is marked on register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - }, - { - "map element value store of cleared call register", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R1 !read_ok", - .errstr = "R1 !read_ok", - .result = REJECT, - .result_unpriv = REJECT, - }, - { - "map element value with unaligned store", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), - BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), - BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), - BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), - BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), - BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), - BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), - BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "map element value with unaligned load", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "map element value illegal alu op, 1", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 bitwise operator &= on pointer", - .result = REJECT, - }, - { - "map element value illegal alu op, 2", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 32-bit pointer arithmetic prohibited", - .result = REJECT, - }, - { - "map element value illegal alu op, 3", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 pointer arithmetic with /= operator", - .result = REJECT, - }, - { - "map element value illegal alu op, 4", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "invalid mem access 'inv'", - .result = REJECT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "map element value illegal alu op, 5", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_IMM(BPF_REG_3, 4096), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 invalid mem access 'inv'", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "map element value is preserved across register spilling", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, - offsetof(struct test_val, foo)), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .result = ACCEPT, - .result_unpriv = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, bitwise AND, zero included", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, bitwise AND + JMP, wrong max", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack type R1 off=-64 access_size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP (signed), correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP, bounds + offset", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack type R1 off=-64 access_size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP, wrong max", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid stack type R1 off=-64 access_size=65", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP, no max check", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - /* because max wasn't checked, signed min is negative */ - .errstr = "R2 min value is negative, either use unsigned or 'var &= const'", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP, no min check", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: stack, JMP (signed), no min check", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 16), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R2 min value is negative", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: map, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, - sizeof(struct test_val), 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: map, JMP, wrong max", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, - sizeof(struct test_val) + 1, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=49", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: map adjusted, JMP, correct bounds", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, - sizeof(struct test_val) - 20, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: map adjusted, JMP, wrong max", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), - BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, - sizeof(struct test_val) - 19, 4), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R1 min value is outside of the array range", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=inv expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_csum_diff), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 0 /* csum_diff of 64-byte packet */, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=inv expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .errstr = "R1 type=inv expected=fp", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: 8 bytes leak", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack off -64+32 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "helper access to variable memory: 8 bytes no leak (init memory)", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "invalid and of negative number", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the array range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid range check", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_9, 1), - BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2), - BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1), - BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1), - BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1), - BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1), - BPF_MOV32_IMM(BPF_REG_3, 1), - BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9), - BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_REG(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr = "R0 max value is outside of the array range", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "map in map access", - .insns = { - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_in_map = { 3 }, - .result = ACCEPT, - }, - { - "invalid inner map pointer", - .insns = { - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_in_map = { 3 }, - .errstr = "R1 pointer arithmetic on map_ptr prohibited", - .result = REJECT, - }, - { - "forgot null checking on the inner map pointer", - .insns = { - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_ST_MEM(0, BPF_REG_10, -4, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_in_map = { 3 }, - .errstr = "R1 type=map_value_or_null expected=map_ptr", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r1", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R1 !read_ok", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R2 !read_ok", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r3", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .errstr = "R3 !read_ok", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_EXIT_INSN(), - }, - .errstr = "R4 !read_ok", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r5", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .errstr = "R5 !read_ok", - .result = REJECT, - }, - { - "ld_abs: check calling conv, r7", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_7, 0), - BPF_LD_ABS(BPF_W, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "ld_abs: tests on r6 and skb data reload helper", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_6, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_vlan_push), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 42 /* ultimate return value */, - }, - { - "ld_ind: check calling conv, r1", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R1 !read_ok", - .result = REJECT, - }, - { - "ld_ind: check calling conv, r2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R2 !read_ok", - .result = REJECT, - }, - { - "ld_ind: check calling conv, r3", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_3, 1), - BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .errstr = "R3 !read_ok", - .result = REJECT, - }, - { - "ld_ind: check calling conv, r4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_EXIT_INSN(), - }, - .errstr = "R4 !read_ok", - .result = REJECT, - }, - { - "ld_ind: check calling conv, r5", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_5, 1), - BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .errstr = "R5 !read_ok", - .result = REJECT, - }, - { - "ld_ind: check calling conv, r7", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 1, - }, - { - "check bpf_perf_event_data->sample_period byte load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period)), -#else - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period) + 7), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "check bpf_perf_event_data->sample_period half load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period)), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period) + 6), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "check bpf_perf_event_data->sample_period word load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period)), -#else - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period) + 4), -#endif - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "check bpf_perf_event_data->sample_period dword load permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, - offsetof(struct bpf_perf_event_data, sample_period)), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_PERF_EVENT, - }, - { - "check skb->data half load not permitted", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data)), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data) + 2), -#endif - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - }, - { - "check skb->tc_classid half load not permitted for lwt prog", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid)), -#else - BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, tc_classid) + 2), -#endif - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid bpf_context access", - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "bounds checks mixing signed and unsigned, positive bounds", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 2), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), - BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 4", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - }, - { - "bounds checks mixing signed and unsigned, variant 5", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 6", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_6, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_load_bytes), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R4 min value is negative, either use unsigned", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 7", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - }, - { - "bounds checks mixing signed and unsigned, variant 8", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 9", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - }, - { - "bounds checks mixing signed and unsigned, variant 10", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 11", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - /* Dead branch. */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 12", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -6), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 13", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 2), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 14", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -1), - BPF_MOV64_IMM(BPF_REG_8, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6), - BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), - BPF_JMP_IMM(BPF_JA, 0, 0, -7), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 15", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, -6), - BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "unbounded min value", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - .result_unpriv = REJECT, - }, - { - "subtraction bounds (map value) variant 1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7), - BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 max value is outside of the array range", - .result = REJECT, - }, - { - "subtraction bounds (map value) variant 2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6), - BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", - .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", - .result = REJECT, - }, - { - "check subtraction on pointers for unpriv", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LD_MAP_FD(BPF_REG_ARG1, 0), - BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP), - BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_ARG1, 0), - BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 1, 9 }, - .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R9 pointer -= pointer prohibited", - }, - { - "bounds check based on zero-extended MOV", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - /* r2 = 0x0000'0000'ffff'ffff */ - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), - /* r2 = 0 */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), - /* no-op */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - /* access at offset 0 */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT - }, - { - "bounds check based on sign-extended MOV. test1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - /* r2 = 0xffff'ffff'ffff'ffff */ - BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), - /* r2 = 0xffff'ffff */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), - /* r0 = */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - /* access to OOB pointer */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "map_value pointer and 4294967295", - .result = REJECT - }, - { - "bounds check based on sign-extended MOV. test2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - /* r2 = 0xffff'ffff'ffff'ffff */ - BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), - /* r2 = 0xfff'ffff */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), - /* r0 = */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - /* access to OOB pointer */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 min value is outside of the array range", - .result = REJECT - }, - { - "bounds check based on reg_off + var_off + insn_off. test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "value_size=8 off=1073741825", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "bounds check based on reg_off + var_off + insn_off. test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 4 }, - .errstr = "value 1073741823", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "bounds check after truncation of non-boundary-crossing range", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - /* r1 = [0x00, 0xff] */ - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_2, 1), - /* r2 = 0x10'0000'0000 */ - BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), - /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - /* r1 = [0x00, 0xff] */ - BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), - /* r1 = 0 */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* no-op */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* access at offset 0 */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT - }, - { - "bounds check after truncation of boundary-crossing range (1)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - /* r1 = [0x00, 0xff] */ - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0xffff'ff80, 0x1'0000'007f] */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0xffff'ff80, 0xffff'ffff] or - * [0x0000'0000, 0x0000'007f] - */ - BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0x00, 0xff] or - * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = 0 or - * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* no-op or OOB pointer computation */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - /* not actually fully unbounded, but the bound is very high */ - .errstr = "R0 unbounded memory access", - .result = REJECT - }, - { - "bounds check after truncation of boundary-crossing range (2)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), - /* r1 = [0x00, 0xff] */ - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0xffff'ff80, 0x1'0000'007f] */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0xffff'ff80, 0xffff'ffff] or - * [0x0000'0000, 0x0000'007f] - * difference to previous test: truncation via MOV32 - * instead of ALU32. - */ - BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = [0x00, 0xff] or - * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = 0 or - * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* no-op or OOB pointer computation */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - /* not actually fully unbounded, but the bound is very high */ - .errstr = "R0 unbounded memory access", - .result = REJECT - }, - { - "bounds check after wrapping 32-bit addition", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - /* r1 = 0x7fff'ffff */ - BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), - /* r1 = 0xffff'fffe */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - /* r1 = 0 */ - BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), - /* no-op */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* access at offset 0 */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT - }, - { - "bounds check after shift with oversized count operand", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - BPF_MOV64_IMM(BPF_REG_2, 32), - BPF_MOV64_IMM(BPF_REG_1, 1), - /* r1 = (u32)1 << (u32)32 = ? */ - BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), - /* r1 = [0x0000, 0xffff] */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), - /* computes unknown pointer, potentially OOB */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 max value is outside of the array range", - .result = REJECT - }, - { - "bounds check after right shift of maybe-negative number", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - /* r1 = [0x00, 0xff] */ - BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - /* r1 = [-0x01, 0xfe] */ - BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), - /* r1 = 0 or 0xff'ffff'ffff'ffff */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* r1 = 0 or 0xffff'ffff'ffff */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), - /* computes unknown pointer, potentially OOB */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* potentially OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 unbounded memory access", - .result = REJECT - }, - { - "bounds check after 32-bit right shift with 64-bit input", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), - /* r1 = 2 */ - BPF_MOV64_IMM(BPF_REG_1, 2), - /* r1 = 1<<32 */ - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31), - /* r1 = 0 (NOT 2!) */ - BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), - /* r1 = 0xffff'fffe (NOT 0!) */ - BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), - /* computes OOB pointer */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - /* OOB access */ - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - /* exit */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R0 invalid mem access", - .result = REJECT, - }, - { - "bounds check map access with off+size signed 32bit overflow. test1", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_JMP_A(0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "map_value pointer and 2147483646", - .result = REJECT - }, - { - "bounds check map access with off+size signed 32bit overflow. test2", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_JMP_A(0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "pointer offset 1073741822", - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .result = REJECT - }, - { - "bounds check map access with off+size signed 32bit overflow. test3", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), - BPF_JMP_A(0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "pointer offset -1073741822", - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", - .result = REJECT - }, - { - "bounds check map access with off+size signed 32bit overflow. test4", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_1, 1000000), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), - BPF_JMP_A(0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "map_value pointer and 1000000000000", - .result = REJECT - }, - { - "pointer/scalar confusion in state equality check (way 1)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_JMP_A(1), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - BPF_JMP_A(0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .retval = POINTER_VALUE, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 leaks addr as return value" - }, - { - "pointer/scalar confusion in state equality check (way 2)", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - BPF_JMP_A(1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = ACCEPT, - .retval = POINTER_VALUE, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 leaks addr as return value" - }, - { - "variable-offset ctx access", - .insns = { - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - /* add it to skb. We now have either &skb->len or - * &skb->pkt_type, but we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - /* dereference it */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "variable ctx access var_off=(0x0; 0x4)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "variable-offset stack access", - .insns = { - /* Fill the top 8 bytes of the stack */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), - /* add it to fp. We now have either fp-4 or fp-8, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), - BPF_EXIT_INSN(), - }, - .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "indirect variable-offset stack access", - .insns = { - /* Fill the top 8 bytes of the stack */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), - /* add it to fp. We now have either fp-4 or fp-8, but - * we don't know which - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* dereference it indirectly */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "variable stack read R2", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "direct stack access with 32-bit wraparound. test1", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN() - }, - .errstr = "fp pointer and 2147483647", - .result = REJECT - }, - { - "direct stack access with 32-bit wraparound. test2", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN() - }, - .errstr = "fp pointer and 1073741823", - .result = REJECT - }, - { - "direct stack access with 32-bit wraparound. test3", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), - BPF_EXIT_INSN() - }, - .errstr = "fp pointer offset 1073741822", - .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", - .result = REJECT - }, - { - "liveness pruning and write screening", - .insns = { - /* Get an unknown value */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* branch conditions teach us nothing about R2 */ - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R0 !read_ok", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, - }, - { - "varlen_map_value_access pruning", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), - BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), - BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 3 }, - .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", - .result_unpriv = REJECT, - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "invalid 64-bit BPF_END", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 0), - { - .code = BPF_ALU64 | BPF_END | BPF_TO_LE, - .dst_reg = BPF_REG_0, - .src_reg = 0, - .off = 0, - .imm = 32, - }, - BPF_EXIT_INSN(), - }, - .errstr = "unknown opcode d7", - .result = REJECT, - }, - { - "XDP, using ifindex from netdev", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, ingress_ifindex)), - BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .retval = 1, - }, - { - "meta access, test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet, off=-8", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test5", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), - BPF_MOV64_IMM(BPF_REG_2, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_xdp_adjust_meta), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R3 !read_ok", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test7", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test8", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test9", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test10", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_IMM(BPF_REG_5, 42), - BPF_MOV64_IMM(BPF_REG_6, 24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), - BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid access to packet", - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test11", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_IMM(BPF_REG_5, 42), - BPF_MOV64_IMM(BPF_REG_6, 24), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "meta access, test12", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), - BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "arithmetic ops make PTR_TO_CTX unusable", - .insns = { - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, - offsetof(struct __sk_buff, data) - - offsetof(struct __sk_buff, mark)), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .errstr = "dereference of modified ctx ptr", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "pkt_end - pkt_start is allowed", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = TEST_DATA_LEN, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "XDP pkt read, pkt_end mangling, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R3 pointer arithmetic on pkt_end", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "XDP pkt read, pkt_end mangling, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R3 pointer arithmetic on pkt_end", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "XDP pkt read, pkt_data' > pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' > pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' > pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end > pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end > pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end > pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' < pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' < pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end < pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end < pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end < pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' >= pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' >= pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end >= pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end >= pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end >= pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' <= pkt_end, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' <= pkt_end, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data' <= pkt_end, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end <= pkt_data', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_end <= pkt_data', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' > pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' > pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' > pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data > pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data > pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' < pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' < pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data < pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data < pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data < pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' >= pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data >= pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data >= pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data >= pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' <= pkt_data, good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_meta' <= pkt_data, bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data <= pkt_meta', good access", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "XDP pkt read, pkt_data <= pkt_meta', bad access 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data_meta)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R1 offset is outside of the packet", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "check deducing bounds from const, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 tried to subtract pointer from scalar", - }, - { - "check deducing bounds from const, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 1, - }, - { - "check deducing bounds from const, 3", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 tried to subtract pointer from scalar", - }, - { - "check deducing bounds from const, 4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - }, - { - "check deducing bounds from const, 5", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 tried to subtract pointer from scalar", - }, - { - "check deducing bounds from const, 6", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 tried to subtract pointer from scalar", - }, - { - "check deducing bounds from const, 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, ~0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "dereference of modified ctx ptr", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "check deducing bounds from const, 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, ~0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "dereference of modified ctx ptr", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "check deducing bounds from const, 9", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R0 tried to subtract pointer from scalar", - }, - { - "check deducing bounds from const, 10", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), - /* Marks reg as unknown. */ - BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), - BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "math between ctx pointer and register with unbounded min value is not allowed", - }, - { - "bpf_exit with invalid return code. test1", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0xffffffff)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x0; 0x3)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test4", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test5", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has value (0x2; 0x0)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test6", - .insns = { - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr = "R0 is not a known value (ctx)", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "bpf_exit with invalid return code. test7", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), - BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr = "R0 has unknown scalar value", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, - }, - { - "calls: basic sanity", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = ACCEPT, - }, - { - "calls: not on unpriviledged", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = 1, - }, - { - "calls: div by 0 in subprog", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV32_IMM(BPF_REG_2, 0), - BPF_MOV32_IMM(BPF_REG_3, 1), - BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "calls: multiple ret types in subprog 1", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_MOV32_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "R0 invalid mem access 'inv'", - }, - { - "calls: multiple ret types in subprog 2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, - offsetof(struct __sk_buff, data)), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 16 }, - .result = REJECT, - .errstr = "R0 min value is outside of the array range", - }, - { - "calls: overlapping caller/callee", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "last insn is not an exit or jmp", - .result = REJECT, - }, - { - "calls: wrong recursive calls", - .insns = { - BPF_JMP_IMM(BPF_JA, 0, 0, 4), - BPF_JMP_IMM(BPF_JA, 0, 0, 4), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "calls: wrong src reg", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "BPF_CALL uses reserved fields", - .result = REJECT, - }, - { - "calls: wrong off value", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "BPF_CALL uses reserved fields", - .result = REJECT, - }, - { - "calls: jump back loop", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", - .result = REJECT, - }, - { - "calls: conditional call", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "calls: conditional call 2", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = ACCEPT, - }, - { - "calls: conditional call 3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_JMP_IMM(BPF_JA, 0, 0, 4), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, -6), - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_JMP_IMM(BPF_JA, 0, 0, -6), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn", - .result = REJECT, - }, - { - "calls: conditional call 4", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, -5), - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = ACCEPT, - }, - { - "calls: conditional call 5", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, -6), - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn", - .result = REJECT, - }, - { - "calls: conditional call 6", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn", - .result = REJECT, - }, - { - "calls: using r0 returned by callee", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .result = ACCEPT, - }, - { - "calls: using uninit r0 from callee", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "!read_ok", - .result = REJECT, - }, - { - "calls: callee is using r1", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_ACT, - .result = ACCEPT, - .retval = TEST_DATA_LEN, - }, - { - "calls: callee using args1", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "allowed for root only", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = POINTER_VALUE, - }, - { - "calls: callee using wrong args2", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "R2 !read_ok", - .result = REJECT, - }, - { - "calls: callee using two args", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, - offsetof(struct __sk_buff, len)), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, - offsetof(struct __sk_buff, len)), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "allowed for root only", - .result_unpriv = REJECT, - .result = ACCEPT, - .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, - }, - { - "calls: callee changing pkt pointers", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), - BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - /* clear_all_pkt_pointers() has to walk all frames - * to make sure that pkt pointers in the caller - * are cleared when callee is calling a helper that - * adjusts packet size - */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_xdp_adjust_head), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "R6 invalid mem access 'inv'", - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: two calls with args", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = TEST_DATA_LEN + TEST_DATA_LEN, - }, - { - "calls: calls with stack arith", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 42, - }, - { - "calls: calls with misaligned stack access", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, - .errstr = "misaligned stack access", - .result = REJECT, - }, - { - "calls: calls control flow, jump test", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 43), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, -3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 43, - }, - { - "calls: calls control flow, jump test 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 43), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "jump out of range from insn 1 to 4", - .result = REJECT, - }, - { - "calls: two calls with bad jump", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "jump out of range from insn 11 to 9", - .result = REJECT, - }, - { - "calls: recursive call. test1", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", - .result = REJECT, - }, - { - "calls: recursive call. test2", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", - .result = REJECT, - }, - { - "calls: unreachable code", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "unreachable insn 6", - .result = REJECT, - }, - { - "calls: invalid call", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "invalid destination", - .result = REJECT, - }, - { - "calls: invalid call 2", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "invalid destination", - .result = REJECT, - }, - { - "calls: jumping across function bodies. test1", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "calls: jumping across function bodies. test2", - .insns = { - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "jump out of range", - .result = REJECT, - }, - { - "calls: call without exit", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "not an exit", - .result = REJECT, - }, - { - "calls: call into middle of ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "last insn", - .result = REJECT, - }, - { - "calls: call into middle of other call", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "last insn", - .result = REJECT, - }, - { - "calls: ld_abs with changing ctx data in callee", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_vlan_push), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", - .result = REJECT, - }, - { - "calls: two calls with bad fallthrough", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, len)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "not an exit", - .result = REJECT, - }, - { - "calls: two calls with stack read", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = ACCEPT, - }, - { - "calls: two calls with stack write", - .insns = { - /* main prog */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), - /* write into stack frame of main prog */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* read from stack frame of main prog */ - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = ACCEPT, - }, - { - "calls: stack overflow using two frames (pre-call access)", - .insns = { - /* prog 1 */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* prog 2 */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .errstr = "combined stack size", - .result = REJECT, - }, - { - "calls: stack overflow using two frames (post-call access)", - .insns = { - /* prog 1 */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_EXIT_INSN(), - - /* prog 2 */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .errstr = "combined stack size", - .result = REJECT, - }, - { - "calls: stack depth check using three frames. test1", - .insns = { - /* main */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* A */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), - BPF_EXIT_INSN(), - /* B */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - /* stack_main=32, stack_A=256, stack_B=64 - * and max(main+A, main+A+B) < 512 - */ - .result = ACCEPT, - }, - { - "calls: stack depth check using three frames. test2", - .insns = { - /* main */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* A */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), - BPF_EXIT_INSN(), - /* B */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - /* stack_main=32, stack_A=64, stack_B=256 - * and max(main+A, main+A+B) < 512 - */ - .result = ACCEPT, - }, - { - "calls: stack depth check using three frames. test3", - .insns = { - /* main */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ - BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), - BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* A */ - BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, -3), - /* B */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ - BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - /* stack_main=64, stack_A=224, stack_B=256 - * and max(main+A, main+A+B) > 512 - */ - .errstr = "combined stack", - .result = REJECT, - }, - { - "calls: stack depth check using three frames. test4", - /* void main(void) { - * func1(0); - * func1(1); - * func2(1); - * } - * void func1(int alloc_or_recurse) { - * if (alloc_or_recurse) { - * frame_pointer[-300] = 1; - * } else { - * func2(alloc_or_recurse); - * } - * } - * void func2(int alloc_or_recurse) { - * if (alloc_or_recurse) { - * frame_pointer[-300] = 1; - * } - * } - */ - .insns = { - /* main */ - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* A */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_EXIT_INSN(), - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ - BPF_EXIT_INSN(), - /* B */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = REJECT, - .errstr = "combined stack", - }, - { - "calls: stack depth check using three frames. test5", - .insns = { - /* main */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ - BPF_EXIT_INSN(), - /* A */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ - BPF_EXIT_INSN(), - /* B */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ - BPF_EXIT_INSN(), - /* C */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ - BPF_EXIT_INSN(), - /* D */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ - BPF_EXIT_INSN(), - /* E */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ - BPF_EXIT_INSN(), - /* F */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ - BPF_EXIT_INSN(), - /* G */ - BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ - BPF_EXIT_INSN(), - /* H */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .errstr = "call stack", - .result = REJECT, - }, - { - "calls: spill into caller stack frame", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .errstr = "cannot spill", - .result = REJECT, - }, - { - "calls: write into caller stack frame", - .insns = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - BPF_EXIT_INSN(), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = ACCEPT, - .retval = 42, - }, - { - "calls: write into callee stack frame", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_XDP, - .errstr = "cannot return stack pointer", - .result = REJECT, - }, - { - "calls: two calls with stack write and void return", - .insns = { - /* main prog */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* write into stack frame of main prog */ - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), - BPF_EXIT_INSN(), /* void return */ - }, - .prog_type = BPF_PROG_TYPE_XDP, - .result = ACCEPT, - }, - { - "calls: ambiguous return value", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr_unpriv = "allowed for root only", - .result_unpriv = REJECT, - .errstr = "R0 !read_ok", - .result = REJECT, - }, - { - "calls: two calls that return map_value", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), - - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - /* fetch secound map_value_ptr from the stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - /* call 3rd function twice */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* first time with fp-8 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - /* second time with fp-16 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* subprog 2 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - /* lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr into stack frame of main prog */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), /* return 0 */ - }, - .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map_hash_8b = { 23 }, - .result = ACCEPT, - }, - { - "calls: two calls that return map_value with bool condition", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - /* call 3rd function twice */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* first time with fp-8 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - /* second time with fp-16 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - /* fetch secound map_value_ptr from the stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 2 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - /* lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), /* return 0 */ - /* write map_value_ptr into stack frame of main prog */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), /* return 1 */ - }, - .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map_hash_8b = { 23 }, - .result = ACCEPT, - }, - { - "calls: two calls that return map_value with incorrect bool check", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - /* call 3rd function twice */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* first time with fp-8 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - /* second time with fp-16 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - /* fetch secound map_value_ptr from the stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 2 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - /* lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), /* return 0 */ - /* write map_value_ptr into stack frame of main prog */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), /* return 1 */ - }, - .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map_hash_8b = { 23 }, - .result = REJECT, - .errstr = "invalid read from stack off -16+0 size 8", - }, - { - "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* 1st lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_8, 1), - - /* 2nd lookup from map */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_9, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-16 */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_9, 1), - - /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* if arg2 == 1 do *arg1 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - - /* if arg4 == 1 do *arg3 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 12, 22 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=8 off=2 size=8", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* 1st lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_8, 1), - - /* 2nd lookup from map */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_9, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-16 */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_9, 1), - - /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* if arg2 == 1 do *arg1 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - - /* if arg4 == 1 do *arg3 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 12, 22 }, - .result = ACCEPT, - }, - { - "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* 1st lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_8, 1), - - /* 2nd lookup from map */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_9, 0), // 26 - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - /* write map_value_ptr into stack frame of main prog at fp-16 */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_9, 1), - - /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 - BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 - BPF_JMP_IMM(BPF_JA, 0, 0, -30), - - /* subprog 2 */ - /* if arg2 == 1 do *arg1 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - - /* if arg4 == 1 do *arg3 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, -8), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 12, 22 }, - .result = REJECT, - .errstr = "invalid access to map value, value_size=8 off=2 size=8", - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: two calls that receive map_value_ptr_or_null via arg. test1", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* 1st lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_MOV64_IMM(BPF_REG_8, 1), - - /* 2nd lookup from map */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_9, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_MOV64_IMM(BPF_REG_9, 1), - - /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* if arg2 == 1 do *arg1 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - - /* if arg4 == 1 do *arg3 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 12, 22 }, - .result = ACCEPT, - }, - { - "calls: two calls that receive map_value_ptr_or_null via arg. test2", - .insns = { - /* main prog */ - /* pass fp-16, fp-8 into a function */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), - /* 1st lookup from map */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_MOV64_IMM(BPF_REG_8, 1), - - /* 2nd lookup from map */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_9, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_MOV64_IMM(BPF_REG_9, 1), - - /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* subprog 2 */ - /* if arg2 == 1 do *arg1 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - - /* if arg4 == 0 do *arg3 = 0 */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), - /* fetch map_value_ptr from the stack of this function */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - /* write into map value */ - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_8b = { 12, 22 }, - .result = REJECT, - .errstr = "R0 invalid mem access 'inv'", - }, - { - "calls: pkt_ptr spill into caller stack", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - /* spill unchecked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - /* now the pkt range is verified, read pkt_ptr from stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = POINTER_VALUE, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 2", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - /* Marking is still kept, but not in all cases safe. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - /* spill unchecked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - /* now the pkt range is verified, read pkt_ptr from stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid access to packet", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 3", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - /* Marking is still kept and safe here. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - /* spill unchecked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* now the pkt range is verified, read pkt_ptr from stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 4", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - /* Check marking propagated. */ - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - /* spill unchecked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 5", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - /* spill checked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "same insn cannot be used with different", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 6", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - /* spill checked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "R4 invalid mem access", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - /* spill checked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "R4 invalid mem access", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 8", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), - /* spill checked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: pkt_ptr spill into caller stack 9", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), - BPF_EXIT_INSN(), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), - BPF_MOV64_IMM(BPF_REG_5, 0), - /* spill unchecked pkt_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_5, 1), - /* don't read back pkt_ptr from stack here */ - /* write 4 bytes into packet */ - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid access to packet", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "calls: caller stack init to zero or map_value_or_null", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - /* fetch map_value_or_null or const_zero from stack */ - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - /* store into map_value */ - BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - /* if (ctx == 0) return; */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), - /* else bpf_map_lookup() and *(fp - 8) = r0 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 13 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "calls: stack init to zero and pruning", - .insns = { - /* first make allocated_stack 16 byte */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - /* now fork the execution such that the false branch - * of JGT insn will be verified second and it skisp zero - * init of fp-8 stack slot. If stack liveness marking - * is missing live_read marks from call map_lookup - * processing then pruning will incorrectly assume - * that fp-8 stack slot was unused in the fall-through - * branch and will accept the program incorrectly - */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack off -8+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, - }, - { - "calls: two calls returning different map pointers for lookup (hash, array)", - .insns = { - /* main prog */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_CALL_REL(11), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_CALL_REL(12), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - /* subprog 1 */ - BPF_LD_MAP_FD(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* subprog 2 */ - BPF_LD_MAP_FD(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_hash_48b = { 13 }, - .fixup_map_array_48b = { 16 }, - .result = ACCEPT, - .retval = 1, - }, - { - "calls: two calls returning different map pointers for lookup (hash, map in map)", - .insns = { - /* main prog */ - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_CALL_REL(11), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_CALL_REL(12), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, - offsetof(struct test_val, foo)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - /* subprog 1 */ - BPF_LD_MAP_FD(BPF_REG_0, 0), - BPF_EXIT_INSN(), - /* subprog 2 */ - BPF_LD_MAP_FD(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map_in_map = { 16 }, - .fixup_map_array_48b = { 13 }, - .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", - }, - { - "cond: two branches returning different map pointers for lookup (tail, tail)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 5 }, - .fixup_prog2 = { 2 }, - .result_unpriv = REJECT, - .errstr_unpriv = "tail_call abusing map_ptr", - .result = ACCEPT, - .retval = 42, - }, - { - "cond: two branches returning same map pointers for lookup (tail, tail)", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, - offsetof(struct __sk_buff, mark)), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_JMP_IMM(BPF_JA, 0, 0, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .fixup_prog2 = { 2, 5 }, - .result_unpriv = ACCEPT, - .result = ACCEPT, - .retval = 42, - }, - { - "search pruning: all branches should be verified (nop operation)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_A(1), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), - BPF_MOV64_IMM(BPF_REG_6, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "R6 invalid mem access 'inv'", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "search pruning: all branches should be verified (invalid stack access)", - .insns = { - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), - BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), - BPF_JMP_A(1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), - BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), - BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .errstr = "invalid read from stack off -16+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "jit: lsh, rsh, arsh by 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_IMM(BPF_REG_1, 0xff), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1), - BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1), - BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1), - BPF_EXIT_INSN(), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 2, - }, - { - "jit: mov32 for ldimm64, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32), - BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL), - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 2, - }, - { - "jit: mov32 for ldimm64, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL), - BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL), - BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 2, - }, - { - "jit: various mul tests", - .insns = { - BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), - BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), - BPF_LD_IMM64(BPF_REG_1, 0xefefefULL), - BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), - BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), - BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), - BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), - BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), - BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), - BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), - BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), - BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 2, - }, - { - "xadd/w check unaligned stack", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "misaligned stack access off", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "xadd/w check unaligned map", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 3 }, - .result = REJECT, - .errstr = "misaligned value access off", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - }, - { - "xadd/w check unaligned pkt", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct xdp_md, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct xdp_md, data_end)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), - BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), - BPF_MOV64_IMM(BPF_REG_0, 99), - BPF_JMP_IMM(BPF_JA, 0, 0, 6), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), - BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "BPF_XADD stores into R2 pkt is not allowed", - .prog_type = BPF_PROG_TYPE_XDP, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "xadd/w check whether src/dst got mangled, 1", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 3, - }, - { - "xadd/w check whether src/dst got mangled, 2", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), - BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = 3, - }, - { - "bpf_get_stack return R0 within range", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), - BPF_MOV64_IMM(BPF_REG_4, 256), - BPF_EMIT_CALL(BPF_FUNC_get_stack), - BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), - BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), - BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), - BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_EMIT_CALL(BPF_FUNC_get_stack), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_48b = { 4 }, - .result = ACCEPT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, - }, - { - "ld_abs: invalid op 1", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_DW, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "unknown opcode", - }, - { - "ld_abs: invalid op 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_0, 256), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_IND(BPF_DW, BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "unknown opcode", - }, - { - "ld_abs: nmap reduced", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26), - BPF_MOV32_IMM(BPF_REG_0, 18), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64), - BPF_LD_IND(BPF_W, BPF_REG_7, 14), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60), - BPF_MOV32_IMM(BPF_REG_0, 280971478), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60), - BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13), - BPF_MOV32_IMM(BPF_REG_0, 22), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), - BPF_LD_IND(BPF_H, BPF_REG_7, 14), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52), - BPF_MOV32_IMM(BPF_REG_0, 17366), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48), - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52), - BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV32_IMM(BPF_REG_0, 256), - BPF_EXIT_INSN(), - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .data = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 256, - }, - { - "ld_abs: div + abs, test 1", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_B, 3), - BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), - BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), - BPF_LD_ABS(BPF_B, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), - BPF_LD_IND(BPF_B, BPF_REG_8, -70), - BPF_EXIT_INSN(), - }, - .data = { - 10, 20, 30, 40, 50, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 10, - }, - { - "ld_abs: div + abs, test 2", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_B, 3), - BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), - BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), - BPF_LD_ABS(BPF_B, 128), - BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), - BPF_LD_IND(BPF_B, BPF_REG_8, -70), - BPF_EXIT_INSN(), - }, - .data = { - 10, 20, 30, 40, 50, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "ld_abs: div + abs, test 3", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), - BPF_LD_ABS(BPF_B, 3), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .data = { - 10, 20, 30, 40, 50, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "ld_abs: div + abs, test 4", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), - BPF_LD_ABS(BPF_B, 256), - BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .data = { - 10, 20, 30, 40, 50, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0, - }, - { - "ld_abs: vlan + abs, test 1", - .insns = { }, - .data = { - 0x34, - }, - .fill_helper = bpf_fill_ld_abs_vlan_push_pop, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 0xbef, - }, - { - "ld_abs: vlan + abs, test 2", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), - BPF_MOV64_IMM(BPF_REG_6, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_MOV64_IMM(BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_skb_vlan_push), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_IMM(BPF_REG_0, 42), - BPF_EXIT_INSN(), - }, - .data = { - 0x34, - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 42, - }, - { - "ld_abs: jump around ld_abs", - .insns = { }, - .data = { - 10, 11, - }, - .fill_helper = bpf_fill_jump_around_ld_abs, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 10, - }, - { - "ld_dw: xor semi-random 64 bit imms, test 1", - .insns = { }, - .data = { }, - .fill_helper = bpf_fill_rand_ld_dw, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 4090, - }, - { - "ld_dw: xor semi-random 64 bit imms, test 2", - .insns = { }, - .data = { }, - .fill_helper = bpf_fill_rand_ld_dw, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2047, - }, - { - "ld_dw: xor semi-random 64 bit imms, test 3", - .insns = { }, - .data = { }, - .fill_helper = bpf_fill_rand_ld_dw, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 511, - }, - { - "ld_dw: xor semi-random 64 bit imms, test 4", - .insns = { }, - .data = { }, - .fill_helper = bpf_fill_rand_ld_dw, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 5, - }, - { - "pass unmodified ctx pointer to helper", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_update), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: leak potential reference", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking: leak potential reference on stack", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking: leak potential reference on stack 2", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking: zero potential reference", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking: copy and zero potential references", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking: release reference without check", - .insns = { - BPF_SK_LOOKUP, - /* reference in r0 may be NULL */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "type=sock_or_null expected=sock", - .result = REJECT, - }, - { - "reference tracking: release reference", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: release reference 2", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: release reference twice", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "type=inv expected=sock", - .result = REJECT, - }, - { - "reference tracking: release reference twice inside branch", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "type=inv expected=sock", - .result = REJECT, - }, - { - "reference tracking: alloc, check, free in one subbranch", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), - /* if (offsetof(skb, mark) > data_len) exit; */ - BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, - offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, - BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ - /* Leak reference in R0 */ - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "reference tracking: alloc, check, free in both subbranches", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), - /* if (offsetof(skb, mark) > data_len) exit; */ - BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, - offsetof(struct __sk_buff, mark)), - BPF_SK_LOOKUP, - BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, - }, - { - "reference tracking in call: free reference in subprog", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "pass modified ctx pointer to helper, 1", - .insns = { - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_update), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "dereference of modified ctx ptr", - }, - { - "pass modified ctx pointer to helper, 2", - .insns = { - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_socket_cookie), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .result_unpriv = REJECT, - .result = REJECT, - .errstr_unpriv = "dereference of modified ctx ptr", - .errstr = "dereference of modified ctx ptr", - }, - { - "pass modified ctx pointer to helper, 3", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_csum_update), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "variable ctx access var_off=(0x0; 0x4)", - }, - { - "mov64 src == dst", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_2), - // Check bounds are OK - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "mov64 src != dst", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), - // Check bounds are OK - BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "allocated_stack", - .insns = { - BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), - BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8), - BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9), - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .result_unpriv = ACCEPT, - .insn_processed = 15, - }, - { - "masking, test out of bounds 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 5), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 3", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 4", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 5", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 6", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 5), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 9", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 10", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 11", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test out of bounds 12", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test in bounds 1", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 4), - BPF_MOV32_IMM(BPF_REG_2, 5 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 4, - }, - { - "masking, test in bounds 2", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test in bounds 3", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe), - BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xfffffffe, - }, - { - "masking, test in bounds 4", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0xabcde), - BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0xabcde, - }, - { - "masking, test in bounds 5", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 0), - BPF_MOV32_IMM(BPF_REG_2, 1 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "masking, test in bounds 6", - .insns = { - BPF_MOV32_IMM(BPF_REG_1, 46), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 46, - }, - { - "masking, test in bounds 7", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, -46), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 46, - }, - { - "masking, test in bounds 8", - .insns = { - BPF_MOV64_IMM(BPF_REG_3, -47), - BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), - BPF_MOV32_IMM(BPF_REG_2, 47 - 1), - BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), - BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), - BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), - BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), - BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), - BPF_EXIT_INSN(), - }, - .result = ACCEPT, - .retval = 0, - }, - { - "reference tracking in call: free reference in subprog and outside", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "type=inv expected=sock", - .result = REJECT, - }, - { - "reference tracking in call: alloc & leak reference in subprog", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), - BPF_SK_LOOKUP, - /* spill unchecked sk_ptr into stack of caller */ - BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking in call: alloc in subprog, release outside", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_SK_LOOKUP, - BPF_EXIT_INSN(), /* return sk */ - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .retval = POINTER_VALUE, - .result = ACCEPT, - }, - { - "reference tracking in call: sk_ptr leak into caller stack", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), - /* spill unchecked sk_ptr into stack of caller */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 2 */ - BPF_SK_LOOKUP, - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "Unreleased reference", - .result = REJECT, - }, - { - "reference tracking in call: sk_ptr spill into caller stack", - .insns = { - BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - - /* subprog 1 */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), - /* spill unchecked sk_ptr into stack of caller */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), - BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - /* now the sk_ptr is verified, free the reference */ - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - - /* subprog 2 */ - BPF_SK_LOOKUP, - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: allow LD_ABS", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: forbid LD_ABS while holding reference", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, - BPF_LD_ABS(BPF_B, 0), - BPF_LD_ABS(BPF_H, 0), - BPF_LD_ABS(BPF_W, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", - .result = REJECT, - }, - { - "reference tracking: allow LD_IND", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 1, - }, - { - "reference tracking: forbid LD_IND while holding reference", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_7, 1), - BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", - .result = REJECT, - }, - { - "reference tracking: check reference or tail call", - .insns = { - BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, - /* if (sk) bpf_sk_release() */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), - /* bpf_tail_call() */ - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 17 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: release reference then tail call", - .insns = { - BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - BPF_SK_LOOKUP, - /* if (sk) bpf_sk_release() */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - /* bpf_tail_call() */ - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 18 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: leak possible reference over tail call", - .insns = { - BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, - /* bpf_tail_call() */ - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 2), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 0), - /* if (sk) bpf_sk_release() */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 16 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "tail_call would lead to reference leak", - .result = REJECT, - }, - { - "reference tracking: leak checked reference over tail call", - .insns = { - BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), - /* Look up socket and store in REG_6 */ - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - /* if (!sk) goto end */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - /* bpf_tail_call() */ - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .fixup_prog1 = { 17 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "tail_call would lead to reference leak", - .result = REJECT, - }, - { - "reference tracking: mangle and release sock_or_null", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "R1 pointer arithmetic on sock_or_null prohibited", - .result = REJECT, - }, - { - "reference tracking: mangle and release sock", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "R1 pointer arithmetic on sock prohibited", - .result = REJECT, - }, - { - "reference tracking: access member", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "reference tracking: write to member", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_LD_IMM64(BPF_REG_2, 42), - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2, - offsetof(struct bpf_sock, mark)), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_LD_IMM64(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "cannot write into socket", - .result = REJECT, - }, - { - "reference tracking: invalid 64-bit access of member", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid bpf_sock access off=0 size=8", - .result = REJECT, - }, - { - "reference tracking: access after release", - .insns = { - BPF_SK_LOOKUP, - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "!read_ok", - .result = REJECT, - }, - { - "reference tracking: direct access for lookup", - .insns = { - /* Check that the packet is at least 64B long */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), - BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), - /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ - BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), - BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_EMIT_CALL(BPF_FUNC_sk_release), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "calls: ctx read at start of subprog", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), - BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", - .result_unpriv = REJECT, - .result = ACCEPT, - }, - { - "check wire_len is not readable by sockets", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, wire_len)), - BPF_EXIT_INSN(), - }, - .errstr = "invalid bpf_context access", - .result = REJECT, - }, - { - "check wire_len is readable by tc classifier", - .insns = { - BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, - offsetof(struct __sk_buff, wire_len)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - }, - { - "check wire_len is not writable by tc classifier", - .insns = { - BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, - offsetof(struct __sk_buff, wire_len)), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "invalid bpf_context access", - .errstr_unpriv = "R1 leaks addr", - .result = REJECT, - }, - { - "calls: cross frame pruning", - .insns = { - /* r8 = !!random(); - * call pruner() - * if (r8) - * do something bad; - */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_prandom_u32), - BPF_MOV64_IMM(BPF_REG_8, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_8, 1), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "function calls to other bpf functions are allowed for root only", - .errstr = "!read_ok", - .result = REJECT, - }, - { - "jset: functional", - .insns = { - /* r0 = 0 */ - BPF_MOV64_IMM(BPF_REG_0, 0), - /* prep for direct packet access via r2 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), - BPF_EXIT_INSN(), - - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), - - /* reg, bit 63 or bit 0 set, taken */ - BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), - BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), - BPF_EXIT_INSN(), - - /* reg, bit 62, not taken */ - BPF_LD_IMM64(BPF_REG_8, 0x4000000000000000), - BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_EXIT_INSN(), - - /* imm, any bit set, taken */ - BPF_JMP_IMM(BPF_JSET, BPF_REG_7, -1, 1), - BPF_EXIT_INSN(), - - /* imm, bit 31 set, taken */ - BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), - BPF_EXIT_INSN(), - - /* all good - return r0 == 2 */ - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .runs = 7, - .retvals = { - { .retval = 2, - .data64 = { (1ULL << 63) | (1U << 31) | (1U << 0), } - }, - { .retval = 2, - .data64 = { (1ULL << 63) | (1U << 31), } - }, - { .retval = 2, - .data64 = { (1ULL << 31) | (1U << 0), } - }, - { .retval = 2, - .data64 = { (__u32)-1, } - }, - { .retval = 2, - .data64 = { ~0x4000000000000000ULL, } - }, - { .retval = 0, - .data64 = { 0, } - }, - { .retval = 0, - .data64 = { ~0ULL, } - }, - }, - }, - { - "jset: sign-extend", - .insns = { - /* r0 = 0 */ - BPF_MOV64_IMM(BPF_REG_0, 0), - /* prep for direct packet access via r2 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct __sk_buff, data)), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, - offsetof(struct __sk_buff, data_end)), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), - BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), - BPF_EXIT_INSN(), - - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), - - BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), - BPF_EXIT_INSN(), - - BPF_MOV64_IMM(BPF_REG_0, 2), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = ACCEPT, - .retval = 2, - .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, - }, - { - "jset: known const compare", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .retval_unpriv = 1, - .result_unpriv = ACCEPT, - .retval = 1, - .result = ACCEPT, - }, - { - "jset: known const compare bad", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "!read_ok", - .result_unpriv = REJECT, - .errstr = "!read_ok", - .result = REJECT, - }, - { - "jset: unknown const compare taken", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_prandom_u32), - BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), - BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "!read_ok", - .result_unpriv = REJECT, - .errstr = "!read_ok", - .result = REJECT, - }, - { - "jset: unknown const compare not taken", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_prandom_u32), - BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "!read_ok", - .result_unpriv = REJECT, - .errstr = "!read_ok", - .result = REJECT, - }, - { - "jset: half-known const compare", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_prandom_u32), - BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), - BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, - .result = ACCEPT, - }, - { - "jset: range", - .insns = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_get_prandom_u32), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), - BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), - BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), - BPF_EXIT_INSN(), - BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), - BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), - BPF_EXIT_INSN(), - }, - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, - .result = ACCEPT, - }, +#define FILL_ARRAY +#include +#undef FILL_ARRAY }; static int probe_filter_length(const struct bpf_insn *fp) diff --git a/tools/testing/selftests/bpf/verifier/.gitignore b/tools/testing/selftests/bpf/verifier/.gitignore new file mode 100644 index 000000000000..45984a364647 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/.gitignore @@ -0,0 +1 @@ +tests.h diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c new file mode 100644 index 000000000000..e0fad1548737 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -0,0 +1,50 @@ +{ + "invalid and of negative number", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid range check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9), + BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c new file mode 100644 index 000000000000..0dcecaf3ec6f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -0,0 +1,219 @@ +{ + "valid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "valid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "valid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "valid map access into an array with a signed variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .result = REJECT, +}, +{ + "invalid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 min value is outside of the array range", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid map access into an array with no floor check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .errstr = "R0 unbounded memory access", + .result_unpriv = REJECT, + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .result_unpriv = REJECT, + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3, 11 }, + .errstr = "R0 pointer += pointer", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c new file mode 100644 index 000000000000..b8d18642653a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/basic.c @@ -0,0 +1,23 @@ +{ + "empty prog", + .insns = { + }, + .errstr = "unknown opcode 00", + .result = REJECT, +}, +{ + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, +}, +{ + "no bpf_exit", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), + }, + .errstr = "not an exit", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic_call.c b/tools/testing/selftests/bpf/verifier/basic_call.c new file mode 100644 index 000000000000..a8c6ab4c1622 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/basic_call.c @@ -0,0 +1,50 @@ +{ + "invalid call insn1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown opcode 8d", + .result = REJECT, +}, +{ + "invalid call insn2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_CALL uses reserved", + .result = REJECT, +}, +{ + "invalid function call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567), + BPF_EXIT_INSN(), + }, + .errstr = "invalid func unknown#1234567", + .result = REJECT, +}, +{ + "invalid argument register", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "non-invalid argument register", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic_instr.c b/tools/testing/selftests/bpf/verifier/basic_instr.c new file mode 100644 index 000000000000..ed91a7b9a456 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/basic_instr.c @@ -0,0 +1,134 @@ +{ + "add+sub+mul", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_2, 3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -3, +}, +{ + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "arsh32 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "arsh32 on imm 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -16069393, +}, +{ + "arsh32 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "arsh32 on reg 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), + BPF_MOV64_IMM(BPF_REG_1, 15), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 43724, +}, +{ + "arsh64 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "arsh64 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "invalid 64-bit BPF_END", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 0), + { + .code = BPF_ALU64 | BPF_END | BPF_TO_LE, + .dst_reg = BPF_REG_0, + .src_reg = 0, + .off = 0, + .imm = 32, + }, + BPF_EXIT_INSN(), + }, + .errstr = "unknown opcode d7", + .result = REJECT, +}, +{ + "mov64 src == dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_2), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "mov64 src != dst", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + // Check bounds are OK + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c new file mode 100644 index 000000000000..b56f8117c09d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/basic_stack.c @@ -0,0 +1,64 @@ +{ + "stack out of bounds", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack", + .result = REJECT, +}, +{ + "uninitialized stack1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 2 }, + .errstr = "invalid indirect read from stack", + .result = REJECT, +}, +{ + "uninitialized stack2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .errstr = "invalid read from stack", + .result = REJECT, +}, +{ + "invalid fp arithmetic", + /* If this gets ever changed, make sure JITs can deal with it. */ + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 subtraction from stack pointer", + .result = REJECT, +}, +{ + "non-invalid fp arithmetic", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "misaligned read from stack", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned stack access", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/basic_stx_ldx.c b/tools/testing/selftests/bpf/verifier/basic_stx_ldx.c new file mode 100644 index 000000000000..7a0aab3f2cd2 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/basic_stx_ldx.c @@ -0,0 +1,45 @@ +{ + "invalid src register in STX", + .insns = { + BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R15 is invalid", + .result = REJECT, +}, +{ + "invalid dst register in STX", + .insns = { + BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R14 is invalid", + .result = REJECT, +}, +{ + "invalid dst register in ST", + .insns = { + BPF_ST_MEM(BPF_B, 14, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R14 is invalid", + .result = REJECT, +}, +{ + "invalid src register in LDX", + .insns = { + BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R12 is invalid", + .result = REJECT, +}, +{ + "invalid dst register in LDX", + .insns = { + BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R11 is invalid", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c new file mode 100644 index 000000000000..d55f476f2237 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -0,0 +1,508 @@ +{ + "subtraction bounds (map value) variant 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT, +}, +{ + "subtraction bounds (map value) variant 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6), + BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "check subtraction on pointers for unpriv", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, 9 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R9 pointer -= pointer prohibited", +}, +{ + "bounds check based on zero-extended MOV", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0x0000'0000'ffff'ffff */ + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT +}, +{ + "bounds check based on sign-extended MOV. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), + /* r0 = */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "map_value pointer and 4294967295", + .result = REJECT +}, +{ + "bounds check based on sign-extended MOV. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* r2 = 0xffff'ffff'ffff'ffff */ + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), + /* r2 = 0xfff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), + /* r0 = */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + /* access to OOB pointer */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 min value is outside of the array range", + .result = REJECT +}, +{ + "bounds check based on reg_off + var_off + insn_off. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "value_size=8 off=1073741825", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "bounds check based on reg_off + var_off + insn_off. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "value 1073741823", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "bounds check after truncation of non-boundary-crossing range", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + /* r2 = 0x10'0000'0000 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = [0x00, 0xff] */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT +}, +{ + "bounds check after truncation of boundary-crossing range (1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT +}, +{ + "bounds check after truncation of boundary-crossing range (2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0xffff'ff80, 0xffff'ffff] or + * [0x0000'0000, 0x0000'007f] + * difference to previous test: truncation via MOV32 + * instead of ALU32. + */ + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = [0x00, 0xff] or + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), + /* r1 = 0 or + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] + */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* no-op or OOB pointer computation */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + /* not actually fully unbounded, but the bound is very high */ + .errstr = "R0 unbounded memory access", + .result = REJECT +}, +{ + "bounds check after wrapping 32-bit addition", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + /* r1 = 0x7fff'ffff */ + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), + /* r1 = 0xffff'fffe */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + /* r1 = 0 */ + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), + /* no-op */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* access at offset 0 */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT +}, +{ + "bounds check after shift with oversized count operand", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_2, 32), + BPF_MOV64_IMM(BPF_REG_1, 1), + /* r1 = (u32)1 << (u32)32 = ? */ + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), + /* r1 = [0x0000, 0xffff] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 max value is outside of the array range", + .result = REJECT +}, +{ + "bounds check after right shift of maybe-negative number", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = [0x00, 0xff] */ + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + /* r1 = [-0x01, 0xfe] */ + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + /* r1 = 0 or 0xff'ffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* r1 = 0 or 0xffff'ffff'ffff */ + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), + /* computes unknown pointer, potentially OOB */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* potentially OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 unbounded memory access", + .result = REJECT +}, +{ + "bounds check after 32-bit right shift with 64-bit input", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = 2 */ + BPF_MOV64_IMM(BPF_REG_1, 2), + /* r1 = 1<<32 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31), + /* r1 = 0 (NOT 2!) */ + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), + /* r1 = 0xffff'fffe (NOT 0!) */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), + /* computes OOB pointer */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 invalid mem access", + .result = REJECT, +}, +{ + "bounds check map access with off+size signed 32bit overflow. test1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "map_value pointer and 2147483646", + .result = REJECT +}, +{ + "bounds check map access with off+size signed 32bit overflow. test2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "pointer offset 1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .result = REJECT +}, +{ + "bounds check map access with off+size signed 32bit overflow. test3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "pointer offset -1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .result = REJECT +}, +{ + "bounds check map access with off+size signed 32bit overflow. test4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1000000), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "map_value pointer and 1000000000000", + .result = REJECT +}, diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c new file mode 100644 index 000000000000..1fd07a4f27ac --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -0,0 +1,124 @@ +{ + "check deducing bounds from const, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", +}, +{ + "check deducing bounds from const, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 1, +}, +{ + "check deducing bounds from const, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", +}, +{ + "check deducing bounds from const, 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check deducing bounds from const, 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", +}, +{ + "check deducing bounds from const, 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", +}, +{ + "check deducing bounds from const, 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "check deducing bounds from const, 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, ~0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "check deducing bounds from const, 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 tried to subtract pointer from scalar", +}, +{ + "check deducing bounds from const, 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0), + /* Marks reg as unknown. */ + BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between ctx pointer and register with unbounded min value is not allowed", +}, diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c new file mode 100644 index 000000000000..9baca7a75c42 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c @@ -0,0 +1,406 @@ +{ + "bounds checks mixing signed and unsigned, positive bounds", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 2), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds checks mixing signed and unsigned, variant 5", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_6, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R4 min value is negative, either use unsigned", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 7", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds checks mixing signed and unsigned, variant 8", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 9", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds checks mixing signed and unsigned, variant 10", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 11", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + /* Dead branch. */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 12", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -6), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 13", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, 2), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 14", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_8, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6), + BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), + BPF_JMP_IMM(BPF_JA, 0, 0, -7), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, +}, +{ + "bounds checks mixing signed and unsigned, variant 15", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_MOV64_IMM(BPF_REG_2, -6), + BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", + .result = REJECT, + .result_unpriv = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c new file mode 100644 index 000000000000..f24d50f09dbe --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c @@ -0,0 +1,44 @@ +{ + "bpf_get_stack return R0 within range", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_4, 256), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), + BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c new file mode 100644 index 000000000000..4004891afa9c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -0,0 +1,1942 @@ +{ + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 1, +}, +{ + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", +}, +{ + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", +}, +{ + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, +}, +{ + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, +}, +{ + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, +}, +{ + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, +}, +{ + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, +}, +{ + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, +}, +{ + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, +}, +{ + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + .retval = TEST_DATA_LEN, +}, +{ + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, +}, +{ + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, +}, +{ + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, +}, +{ + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, +}, +{ + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, +}, +{ + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, +}, +{ + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 43, +}, +{ + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, +}, +{ + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, +}, +{ + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, +}, +{ + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, +}, +{ + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, +}, +{ + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, +}, +{ + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, +}, +{ + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, +}, +{ + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, +}, +{ + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, +}, +{ + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, +}, +{ + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, +}, +{ + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, +{ + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, +{ + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, +}, +{ + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, +}, +{ + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, +}, +{ + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, +}, +{ + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, +}, +{ + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", +}, +{ + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, +}, +{ + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, +}, +{ + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + .retval = 42, +}, +{ + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, +}, +{ + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, +{ + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, +}, +{ + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_8b = { 23 }, + .result = ACCEPT, +}, +{ + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_8b = { 23 }, + .result = ACCEPT, +}, +{ + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_8b = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", +}, +{ + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 12, 22 }, + .result = ACCEPT, +}, +{ + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 12, 22 }, + .result = ACCEPT, +}, +{ + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_8b = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", +}, +{ + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "calls: ctx read at start of subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "calls: cross frame pruning", + .insns = { + /* r8 = !!random(); + * call pruner() + * if (r8) + * do something bad; + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr = "!read_ok", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c new file mode 100644 index 000000000000..349c0862fb4c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/cfg.c @@ -0,0 +1,70 @@ +{ + "unreachable", + .insns = { + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .errstr = "unreachable", + .result = REJECT, +}, +{ + "unreachable2", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unreachable", + .result = REJECT, +}, +{ + "out of range jump", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "out of range jump2", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, -2), + BPF_EXIT_INSN(), + }, + .errstr = "jump out of range", + .result = REJECT, +}, +{ + "loop (back-edge)", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, -1), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, +}, +{ + "loop2 (back-edge)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, +}, +{ + "conditional loop", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .errstr = "back-edge", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c new file mode 100644 index 000000000000..6d65fe3e7321 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c @@ -0,0 +1,72 @@ +{ + "bpf_exit with invalid return code. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0xffffffff)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0x3)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x2; 0x0)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test6", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 is not a known value (ctx)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "bpf_exit with invalid return code. test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has unknown scalar value", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, diff --git a/tools/testing/selftests/bpf/verifier/cgroup_skb.c b/tools/testing/selftests/bpf/verifier/cgroup_skb.c new file mode 100644 index 000000000000..52e4c03b076b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/cgroup_skb.c @@ -0,0 +1,197 @@ +{ + "direct packet read test#1 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=76 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "direct packet read test#2 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, vlan_proto)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, priority)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, ingress_ifindex)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, hash)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "direct packet read test#3 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, + offsetof(struct __sk_buff, cb[4])), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "direct packet read test#4 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid access of tc_classid for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid access of data_meta for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_meta)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid access of flow_keys for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, flow_keys)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid write access to napi_id for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, + offsetof(struct __sk_buff, napi_id)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/verifier/cgroup_storage.c b/tools/testing/selftests/bpf/verifier/cgroup_storage.c new file mode 100644 index 000000000000..97057c0a1b8a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/cgroup_storage.c @@ -0,0 +1,220 @@ +{ + "valid cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "valid per-cpu cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid per-cpu cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid per-cpu cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid per-cpu cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid per-cpu cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "invalid per-cpu cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "invalid per-cpu cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c new file mode 100644 index 000000000000..84446dfc7c1d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/const_or.c @@ -0,0 +1,60 @@ +{ + "constant register |= constant should keep constant type", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "constant register |= constant should not bypass stack boundary checks", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-48 access_size=58", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "constant register |= constant register should keep constant type", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_MOV64_IMM(BPF_REG_4, 13), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "constant register |= constant register should not bypass stack boundary checks", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_MOV64_IMM(BPF_REG_4, 24), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-48 access_size=58", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c new file mode 100644 index 000000000000..92762c08f5e3 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -0,0 +1,93 @@ +{ + "context stores via ST", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_ST stores into R1 ctx is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "context stores via XADD", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, + BPF_REG_0, offsetof(struct __sk_buff, mark), 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "pass unmodified ctx pointer to helper", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "pass modified ctx pointer to helper, 1", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", +}, +{ + "pass modified ctx pointer to helper, 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr_unpriv = "dereference of modified ctx ptr", + .errstr = "dereference of modified ctx ptr", +}, +{ + "pass modified ctx pointer to helper, 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "variable ctx access var_off=(0x0; 0x4)", +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c new file mode 100644 index 000000000000..b0195770da6a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c @@ -0,0 +1,180 @@ +{ + "valid access family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "valid access remote_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "valid access local_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "valid access remote_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "valid access local_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "valid access remote_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access local_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access size in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, size)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "invalid 64B read of size in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, size)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "invalid read past end of SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, size) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "invalid read offset in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet read for SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "direct packet write for SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, +{ + "overlapping checks for direct packet access SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c new file mode 100644 index 000000000000..881f1c7f57a1 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -0,0 +1,1033 @@ +{ + "access skb fields ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "access skb fields bad1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "access skb fields bad2", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "different pointers", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, +{ + "access skb fields bad3", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -12), + }, + .fixup_map_hash_8b = { 6 }, + .errstr = "different pointers", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, +{ + "access skb fields bad4", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JA, 0, 0, -13), + }, + .fixup_map_hash_8b = { 7 }, + .errstr = "different pointers", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, +{ + "invalid access __sk_buff family", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff remote_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff local_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff remote_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff local_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "invalid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "valid access __sk_buff family", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff remote_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff local_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff remote_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff local_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "valid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "invalid access of tc_classid for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", +}, +{ + "invalid access of skb->mark for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", +}, +{ + "check skb->mark is not writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", +}, +{ + "check skb->tc_index is writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "check skb->priority is writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, priority)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "direct packet read for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "direct packet write for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "overlapping checks for direct packet access SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, +}, +{ + "check skb->mark is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, +}, +{ + "check skb->tc_index is not writeable by sockets", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, +}, +{ + "check cb access: byte", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 3), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "__sk_buff->hash, offset 0, byte store not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, hash)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "__sk_buff->tc_index, offset 3, byte store not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index) + 3), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check skb->hash byte load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check skb->hash byte load permitted 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check skb->hash byte load permitted 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check skb->hash byte load permitted 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check cb access: byte, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "check cb access: half", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3]) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check cb access: half, unaligned", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check __sk_buff->hash, offset 0, half store not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, hash)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check __sk_buff->tc_index, offset 2, half store not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check skb->hash half load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 2), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check skb->hash half load permitted 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 2), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash)), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check skb->hash half load not permitted, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check skb->hash half load not permitted, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "check cb access: half, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "check cb access: word", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check cb access: word, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0]) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: word, unaligned 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: word, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 2), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: word, unaligned 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4]) + 3), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: double", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "check cb access: double, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[1])), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: double, unaligned 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_EXIT_INSN(), + }, + .errstr = "misaligned context access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "check cb access: double, oob 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check cb access: double, oob 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check __sk_buff->ifindex dw store not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, ifindex)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check __sk_buff->ifindex dw load not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, ifindex)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check cb access: double, wrong type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, +}, +{ + "check out of range skb->cb access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0]) + 256), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, +}, +{ + "write skb fields from socket prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .errstr_unpriv = "R1 leaks addr", + .result_unpriv = REJECT, +}, +{ + "write skb fields from tc_cls_act prog", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "", + .result_unpriv = REJECT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "check skb->data half load not permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data) + 2), +#endif + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "read gso_segs from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "write gso_segs from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access off=164 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, +}, +{ + "read gso_segs from CLS", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, gso_segs)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "check wire_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "check wire_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "check wire_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c new file mode 100644 index 000000000000..50a8a63be4ac --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -0,0 +1,159 @@ +{ + "dead code: start", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: mid 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: mid 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 4), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 1, +}, +{ + "dead code: end 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: end 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 12), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: end 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 12), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + }, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: tail of main + func", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 8, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 12), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: tail of main + two functions", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 8, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 12), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: function in the middle and mid of another func", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 12), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 7), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 7, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 7, +}, +{ + "dead code: middle of main before call", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 2, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, +}, +{ + "dead code: start of a function", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 2, +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c new file mode 100644 index 000000000000..e3fc22e672c2 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -0,0 +1,633 @@ +{ + "pkt_end - pkt_start is allowed", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = TEST_DATA_LEN, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access off=76", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +}, +{ + "direct packet access: test4 (write)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test5 (pkt_end >= reg, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test6 (pkt_end >= reg, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test7 (pkt_end >= reg, both accesses)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test8 (double test, variant 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test9 (double test, variant 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test10 (write invalid)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test11 (shift, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 144), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, +}, +{ + "direct packet access: test12 (and, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_3, 144), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, +}, +{ + "direct packet access: test13 (branches, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2), + BPF_MOV64_IMM(BPF_REG_3, 14), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 24), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23), + BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, +}, +{ + "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), + BPF_MOV64_IMM(BPF_REG_5, 12), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, +}, +{ + "direct packet access: test15 (spill with xadd)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_5, 4096), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test16 (arith on data_end)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R3 pointer arithmetic on pkt_end", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test17 (pruning, alignment)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_JMP_A(-6), + }, + .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "direct packet access: test18 (imm += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test19 (imm += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test20 (x += pkt_ptr, 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test21 (x += pkt_ptr, 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test22 (x += pkt_ptr, 3)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test23 (x += pkt_ptr, 4)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test24 (x += pkt_ptr, 5)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 64), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "direct packet access: test25 (marking on <, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test26 (marking on <, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "direct packet access: test27 (marking on <=, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, +}, +{ + "direct packet access: test28 (marking on <=, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c new file mode 100644 index 000000000000..698e3779fdd2 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c @@ -0,0 +1,40 @@ +{ + "direct stack access with 32-bit wraparound. test1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "fp pointer and 2147483647", + .result = REJECT +}, +{ + "direct stack access with 32-bit wraparound. test2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "fp pointer and 1073741823", + .result = REJECT +}, +{ + "direct stack access with 32-bit wraparound. test3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "fp pointer offset 1073741822", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result = REJECT +}, diff --git a/tools/testing/selftests/bpf/verifier/div0.c b/tools/testing/selftests/bpf/verifier/div0.c new file mode 100644 index 000000000000..7685edfbcf71 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/div0.c @@ -0,0 +1,184 @@ +{ + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, +}, +{ + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, +{ + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, +{ + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, +}, diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c new file mode 100644 index 000000000000..bd3f38dbe796 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/div_overflow.c @@ -0,0 +1,104 @@ +/* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ +{ + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, +}, +{ + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, +}, +{ + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c new file mode 100644 index 000000000000..1f39d845c64f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -0,0 +1,614 @@ +{ + "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, bitwise AND, zero included", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "invalid indirect read from stack off -64+0 size 64", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, bitwise AND + JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP (signed), correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP, bounds + offset", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid stack type R1 off=-64 access_size=65", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP, no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* because max wasn't checked, signed min is negative */ + .errstr = "R2 min value is negative, either use unsigned or 'var &= const'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP, no min check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid indirect read from stack off -64+0 size 64", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: stack, JMP (signed), no min check", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 16), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: map, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: map, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=49", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: map adjusted, JMP, correct bounds", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: map adjusted, JMP, wrong max", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=inv expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=inv expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .errstr = "R1 type=inv expected=fp", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: 8 bytes leak", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .errstr = "invalid indirect read from stack off -64+32 size 64", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to variable memory: 8 bytes no leak (init memory)", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/helper_packet_access.c b/tools/testing/selftests/bpf/verifier/helper_packet_access.c new file mode 100644 index 000000000000..ae54587e9829 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/helper_packet_access.c @@ -0,0 +1,460 @@ +{ + "helper access to packet: test1, valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .result_unpriv = ACCEPT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "helper access to packet: test2, unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "helper access to packet: test3, variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 11 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "helper access to packet: test4, packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 7 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "helper access to packet: test5, packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 6 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "helper access to packet: test6, cls valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test7, cls unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test8, cls variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 11 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test9, cls packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 7 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test10, cls packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 6 }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test11, cls unsuitable helper 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 42), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test12, cls unsuitable helper 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test13, cls helper ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test14, cls helper ok sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test15, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test16, cls helper fail range 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test17, cls helper fail range 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, -9), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R2 min value is negative", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test18, cls helper fail range 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, ~0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R2 min value is negative", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test19, cls helper range zero", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test20, pkt end as input", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=pkt_end expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "helper access to packet: test21, wrong reg", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c new file mode 100644 index 000000000000..7572e403ddb9 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c @@ -0,0 +1,953 @@ +{ + "helper access to map: full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=0 size=56", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: negative range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=0", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - offsetof(struct test_val, foo) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=52", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): negative range (> adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const imm): negative range (< adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): out-of-bound range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo) + 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=52", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): negative range (> adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via const reg): negative range (< adjustment)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R2 min value is negative", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via variable): full range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via variable): partial range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via variable): empty range", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_trace_printk), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R1 min value is outside of the array range", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via variable): no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R1 unbounded memory access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to adjusted map (via variable): wrong max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, + sizeof(struct test_val) - + offsetof(struct test_val, foo) + 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "invalid access to map value, value_size=48 off=4 size=45", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using <, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using <, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = REJECT, + .errstr = "R1 unbounded memory access", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using <=, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using <=, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = REJECT, + .errstr = "R1 unbounded memory access", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<, good access 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = REJECT, + .errstr = "R1 min value is negative", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<=, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<=, good access 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "helper access to map: bounds check using s<=, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .result = REJECT, + .errstr = "R1 min value is negative", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map lookup helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 8 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map update helper access to map", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map update helper access to map: wrong size", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_update_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_16b = { 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=0 size=16", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const imm)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, offsetof(struct other_val, bar)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 9 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const imm): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, sizeof(struct other_val) - 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const imm): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 9 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const reg)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, offsetof(struct other_val, bar)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 10 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const reg): out-of-bound 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct other_val) - 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=12 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via const reg): out-of-bound 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, -4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 10 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=-4 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via variable)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar), 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 11 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via variable): no max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 10 }, + .result = REJECT, + .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "map helper access to adjusted map (via variable): wrong max check", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar) + 1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 3, 11 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=16 off=9 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c new file mode 100644 index 000000000000..be488b4495a3 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -0,0 +1,88 @@ +{ + "jit: lsh, rsh, arsh by 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: mov32 for ldimm64, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32), + BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: mov32 for ldimm64, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL), + BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jit: various mul tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_1, 0xefefefULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c new file mode 100644 index 000000000000..ceb39ffa0e88 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -0,0 +1,724 @@ +{ + "jset32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + /* reg, high bits shouldn't be tested */ + BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, -2, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 1, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { 1ULL << 63, } + }, + { .retval = 2, + .data64 = { 1, } + }, + { .retval = 2, + .data64 = { 1ULL << 63 | 1, } + }, + }, +}, +{ + "jset32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000), + BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { 1ULL << 63, } + }, + { .retval = 2, + .data64 = { 1, } + }, + { .retval = 2, + .data64 = { 1ULL << 63 | 1, } + }, + }, +}, +{ + "jset32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "jeq32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 2, + .retvals = { + { .retval = 0, + .data64 = { -2, } + }, + { .retval = 2, + .data64 = { -1, } + }, + }, +}, +{ + "jeq32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_8, 0x7000000000000001), + BPF_JMP32_REG(BPF_JEQ, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { 2, } + }, + { .retval = 2, + .data64 = { 1, } + }, + { .retval = 2, + .data64 = { 1ULL << 63 | 1, } + }, + }, +}, +{ + "jeq32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "jne32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 2, + .retvals = { + { .retval = 2, + .data64 = { 1, } + }, + { .retval = 0, + .data64 = { -1, } + }, + }, +}, +{ + "jne32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP32_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { 1, } + }, + { .retval = 2, + .data64 = { 2, } + }, + { .retval = 2, + .data64 = { 1ULL << 63 | 2, } + }, + }, +}, +{ + "jne32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "jge32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, UINT_MAX - 1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { UINT_MAX, } + }, + { .retval = 2, + .data64 = { UINT_MAX - 1, } + }, + { .retval = 0, + .data64 = { 0, } + }, + }, +}, +{ + "jge32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, UINT_MAX | 1ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { UINT_MAX, } + }, + { .retval = 0, + .data64 = { INT_MAX, } + }, + { .retval = 0, + .data64 = { (UINT_MAX - 1) | 2ULL << 32, } + }, + }, +}, +{ + "jge32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jgt32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JGT, BPF_REG_7, UINT_MAX - 1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { UINT_MAX, } + }, + { .retval = 0, + .data64 = { UINT_MAX - 1, } + }, + { .retval = 0, + .data64 = { 0, } + }, + }, +}, +{ + "jgt32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, (UINT_MAX - 1) | 1ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { UINT_MAX, } + }, + { .retval = 0, + .data64 = { UINT_MAX - 1, } + }, + { .retval = 0, + .data64 = { (UINT_MAX - 1) | 2ULL << 32, } + }, + }, +}, +{ + "jgt32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jle32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, INT_MAX, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { INT_MAX - 1, } + }, + { .retval = 0, + .data64 = { UINT_MAX, } + }, + { .retval = 2, + .data64 = { INT_MAX, } + }, + }, +}, +{ + "jle32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, (INT_MAX - 1) | 2ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { INT_MAX | 1ULL << 32, } + }, + { .retval = 2, + .data64 = { INT_MAX - 2, } + }, + { .retval = 0, + .data64 = { UINT_MAX, } + }, + }, +}, +{ + "jle32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jlt32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JLT, BPF_REG_7, INT_MAX, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { INT_MAX, } + }, + { .retval = 0, + .data64 = { UINT_MAX, } + }, + { .retval = 2, + .data64 = { INT_MAX - 1, } + }, + }, +}, +{ + "jlt32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, INT_MAX | 2ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { INT_MAX | 1ULL << 32, } + }, + { .retval = 0, + .data64 = { UINT_MAX, } + }, + { .retval = 2, + .data64 = { (INT_MAX - 1) | 3ULL << 32, } + }, + }, +}, +{ + "jlt32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jsge32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { 0, } + }, + { .retval = 2, + .data64 = { -1, } + }, + { .retval = 0, + .data64 = { -2, } + }, + }, +}, +{ + "jsge32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, (__u32)-1 | 2ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { -1, } + }, + { .retval = 2, + .data64 = { 0x7fffffff | 1ULL << 32, } + }, + { .retval = 0, + .data64 = { -2, } + }, + }, +}, +{ + "jsge32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jsgt32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { (__u32)-2, } + }, + { .retval = 0, + .data64 = { -1, } + }, + { .retval = 2, + .data64 = { 1, } + }, + }, +}, +{ + "jsgt32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, 0x7ffffffe | 1ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 0, + .data64 = { 0x7ffffffe, } + }, + { .retval = 0, + .data64 = { 0x1ffffffffULL, } + }, + { .retval = 2, + .data64 = { 0x7fffffff, } + }, + }, +}, +{ + "jsgt32: min/max deduction", + .insns = { + BPF_RAND_SEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, (__u32)(-2) | 1ULL << 32), + BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jsle32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JSLE, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { (__u32)-2, } + }, + { .retval = 2, + .data64 = { -1, } + }, + { .retval = 0, + .data64 = { 1, } + }, + }, +}, +{ + "jsle32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, 0x7ffffffe | 1ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { 0x7ffffffe, } + }, + { .retval = 2, + .data64 = { (__u32)-1, } + }, + { .retval = 0, + .data64 = { 0x7fffffff | 2ULL << 32, } + }, + }, +}, +{ + "jsle32: min/max deduction", + .insns = { + BPF_RAND_UEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, 0x7ffffff0 | 1ULL << 32), + BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ + "jslt32: BPF_K", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { (__u32)-2, } + }, + { .retval = 0, + .data64 = { -1, } + }, + { .retval = 0, + .data64 = { 1, } + }, + }, +}, +{ + "jslt32: BPF_X", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LD_IMM64(BPF_REG_8, 0x7fffffff | 1ULL << 32), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 3, + .retvals = { + { .retval = 2, + .data64 = { 0x7ffffffe, } + }, + { .retval = 2, + .data64 = { 0xffffffff, } + }, + { .retval = 0, + .data64 = { 0x7fffffff | 2ULL << 32, } + }, + }, +}, +{ + "jslt32: min/max deduction", + .insns = { + BPF_RAND_SEXT_R7, + BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_8, (__u32)(-1) | 1ULL << 32), + BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c new file mode 100644 index 000000000000..7e14037acfaf --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -0,0 +1,165 @@ +{ + "jset: functional", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + /* reg, bit 63 or bit 0 set, taken */ + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + + /* reg, bit 62, not taken */ + BPF_LD_IMM64(BPF_REG_8, 0x4000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + /* imm, any bit set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + + /* imm, bit 31 set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + /* all good - return r0 == 2 */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 7, + .retvals = { + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31), } + }, + { .retval = 2, + .data64 = { (1ULL << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (__u32)-1, } + }, + { .retval = 2, + .data64 = { ~0x4000000000000000ULL, } + }, + { .retval = 0, + .data64 = { 0, } + }, + { .retval = 0, + .data64 = { ~0ULL, } + }, + }, +}, +{ + "jset: sign-extend", + .insns = { + BPF_DIRECT_PKT_R2, + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, +}, +{ + "jset: known const compare", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .retval_unpriv = 1, + .result_unpriv = ACCEPT, + .retval = 1, + .result = ACCEPT, +}, +{ + "jset: known const compare bad", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "jset: unknown const compare taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "jset: unknown const compare not taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "jset: half-known const compare", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, +}, +{ + "jset: range", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c new file mode 100644 index 000000000000..8e6fcc8940f0 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/jump.c @@ -0,0 +1,180 @@ +{ + "jump test 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "jump test 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 14), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 11), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "jump test 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 19), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JA, 0, 0, 15), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32), + BPF_JMP_IMM(BPF_JA, 0, 0, 11), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40), + BPF_JMP_IMM(BPF_JA, 0, 0, 7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 24 }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = -ENOENT, +}, +{ + "jump test 4", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "jump test 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/junk_insn.c b/tools/testing/selftests/bpf/verifier/junk_insn.c new file mode 100644 index 000000000000..89d690f1992a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/junk_insn.c @@ -0,0 +1,45 @@ +{ + "junk insn", + .insns = { + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown opcode 00", + .result = REJECT, +}, +{ + "junk insn2", + .insns = { + BPF_RAW_INSN(1, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_LDX uses reserved fields", + .result = REJECT, +}, +{ + "junk insn3", + .insns = { + BPF_RAW_INSN(-1, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "unknown opcode ff", + .result = REJECT, +}, +{ + "junk insn4", + .insns = { + BPF_RAW_INSN(-1, -1, -1, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "unknown opcode ff", + .result = REJECT, +}, +{ + "junk insn5", + .insns = { + BPF_RAW_INSN(0x7f, -1, -1, -1, -1), + BPF_EXIT_INSN(), + }, + .errstr = "BPF_ALU uses reserved fields", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ld_abs.c b/tools/testing/selftests/bpf/verifier/ld_abs.c new file mode 100644 index 000000000000..f6599d2ec22d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ld_abs.c @@ -0,0 +1,286 @@ +{ + "ld_abs: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, +}, +{ + "ld_abs: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, +}, +{ + "ld_abs: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, +}, +{ + "ld_abs: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, +}, +{ + "ld_abs: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, +}, +{ + "ld_abs: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 0), + BPF_LD_ABS(BPF_W, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "ld_abs: tests on r6 and skb data reload helper", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_vlan_push), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42 /* ultimate return value */, +}, +{ + "ld_abs: invalid op 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_DW, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "unknown opcode", +}, +{ + "ld_abs: invalid op 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 256), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_IND(BPF_DW, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "unknown opcode", +}, +{ + "ld_abs: nmap reduced", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26), + BPF_MOV32_IMM(BPF_REG_0, 18), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64), + BPF_LD_IND(BPF_W, BPF_REG_7, 14), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60), + BPF_MOV32_IMM(BPF_REG_0, 280971478), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60), + BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15), + BPF_LD_ABS(BPF_H, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13), + BPF_MOV32_IMM(BPF_REG_0, 22), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56), + BPF_LD_IND(BPF_H, BPF_REG_7, 14), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52), + BPF_MOV32_IMM(BPF_REG_0, 17366), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52), + BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV32_IMM(BPF_REG_0, 256), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .data = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 256, +}, +{ + "ld_abs: div + abs, test 1", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), + BPF_LD_ABS(BPF_B, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_LD_IND(BPF_B, BPF_REG_8, -70), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 10, +}, +{ + "ld_abs: div + abs, test 2", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0), + BPF_LD_ABS(BPF_B, 128), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_LD_IND(BPF_B, BPF_REG_8, -70), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "ld_abs: div + abs, test 3", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_LD_ABS(BPF_B, 3), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "ld_abs: div + abs, test 4", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_LD_ABS(BPF_B, 256), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .data = { + 10, 20, 30, 40, 50, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, +}, +{ + "ld_abs: vlan + abs, test 1", + .insns = { }, + .data = { + 0x34, + }, + .fill_helper = bpf_fill_ld_abs_vlan_push_pop, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0xbef, +}, +{ + "ld_abs: vlan + abs, test 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .data = { + 0x34, + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, +}, +{ + "ld_abs: jump around ld_abs", + .insns = { }, + .data = { + 10, 11, + }, + .fill_helper = bpf_fill_jump_around_ld_abs, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 10, +}, diff --git a/tools/testing/selftests/bpf/verifier/ld_dw.c b/tools/testing/selftests/bpf/verifier/ld_dw.c new file mode 100644 index 000000000000..d2c75b889598 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ld_dw.c @@ -0,0 +1,36 @@ +{ + "ld_dw: xor semi-random 64 bit imms, test 1", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 4090, +}, +{ + "ld_dw: xor semi-random 64 bit imms, test 2", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2047, +}, +{ + "ld_dw: xor semi-random 64 bit imms, test 3", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 511, +}, +{ + "ld_dw: xor semi-random 64 bit imms, test 4", + .insns = { }, + .data = { }, + .fill_helper = bpf_fill_rand_ld_dw, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 5, +}, diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c new file mode 100644 index 000000000000..28b8c805a293 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -0,0 +1,141 @@ +{ + "test1 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_LD_IMM insn", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, +{ + "test2 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid BPF_LD_IMM insn", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, +{ + "test3 ld_imm64", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test4 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test5 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test6 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "test7 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 1, +}, +{ + "test8 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "uses reserved fields", + .result = REJECT, +}, +{ + "test9 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, 0, 1, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test10 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test11 ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, +{ + "test12 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, 0, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "not pointing to valid bpf_map", + .result = REJECT, +}, +{ + "test13 ld_imm64", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1), + BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_ld_imm64 insn", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ld_ind.c b/tools/testing/selftests/bpf/verifier/ld_ind.c new file mode 100644 index 000000000000..079734227538 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ld_ind.c @@ -0,0 +1,72 @@ +{ + "ld_ind: check calling conv, r1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, +}, +{ + "ld_ind: check calling conv, r2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, +}, +{ + "ld_ind: check calling conv, r3", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .errstr = "R3 !read_ok", + .result = REJECT, +}, +{ + "ld_ind: check calling conv, r4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .errstr = "R4 !read_ok", + .result = REJECT, +}, +{ + "ld_ind: check calling conv, r5", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_5, 1), + BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .errstr = "R5 !read_ok", + .result = REJECT, +}, +{ + "ld_ind: check calling conv, r7", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c new file mode 100644 index 000000000000..d6eec17f2cd2 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/leak_ptr.c @@ -0,0 +1,67 @@ +{ + "leak pointer into ctx 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 2 }, + .errstr_unpriv = "R2 leaks addr into mem", + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 ctx is not allowed", +}, +{ + "leak pointer into ctx 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 leaks addr into mem", + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "BPF_XADD stores into R1 ctx is not allowed", +}, +{ + "leak pointer into ctx 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, + offsetof(struct __sk_buff, cb[0])), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .errstr_unpriv = "R2 leaks addr into ctx", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "leak pointer into map val", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr_unpriv = "R6 leaks addr into mem", + .result_unpriv = REJECT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c new file mode 100644 index 000000000000..2cab6a3966bb --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/lwt.c @@ -0,0 +1,189 @@ +{ + "invalid direct packet write for LWT_IN", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "cannot write into packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "invalid direct packet write for LWT_OUT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "cannot write into packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_OUT, +}, +{ + "direct packet write for LWT_XMIT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, +}, +{ + "direct packet read for LWT_IN", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "direct packet read for LWT_OUT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_OUT, +}, +{ + "direct packet read for LWT_XMIT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, +}, +{ + "overlapping checks for direct packet access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, +}, +{ + "make headroom for LWT_XMIT", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_2, 34), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_skb_change_head), + /* split for s390 to succeed */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 42), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_skb_change_head), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, +}, +{ + "invalid access of tc_classid for LWT_IN", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "invalid access of tc_classid for LWT_OUT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "invalid access of tc_classid for LWT_XMIT", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "check skb->tc_classid half load not permitted for lwt prog", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid) + 2), +#endif + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c new file mode 100644 index 000000000000..2798927ee9ff --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_in_map.c @@ -0,0 +1,62 @@ +{ + "map in map access", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .result = ACCEPT, +}, +{ + "invalid inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 pointer arithmetic on map_ptr prohibited", + .result = REJECT, +}, +{ + "forgot null checking on the inner map pointer", + .insns = { + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_ST_MEM(0, BPF_REG_10, -4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_in_map = { 3 }, + .errstr = "R1 type=map_value_or_null expected=map_ptr", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c new file mode 100644 index 000000000000..cd26ee6b7b1d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -0,0 +1,100 @@ +{ + "calls: two calls returning different map pointers for lookup (hash, array)", + .insns = { + /* main prog */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_CALL_REL(11), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_CALL_REL(12), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* subprog 1 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* subprog 2 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_48b = { 13 }, + .fixup_map_array_48b = { 16 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "calls: two calls returning different map pointers for lookup (hash, map in map)", + .insns = { + /* main prog */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_CALL_REL(11), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_CALL_REL(12), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* subprog 1 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* subprog 2 */ + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_in_map = { 16 }, + .fixup_map_array_48b = { 13 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'map_ptr'", +}, +{ + "cond: two branches returning different map pointers for lookup (tail, tail)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5 }, + .fixup_prog2 = { 2 }, + .result_unpriv = REJECT, + .errstr_unpriv = "tail_call abusing map_ptr", + .result = ACCEPT, + .retval = 42, +}, +{ + "cond: two branches returning same map pointers for lookup (tail, tail)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog2 = { 2, 5 }, + .result_unpriv = ACCEPT, + .result = ACCEPT, + .retval = 42, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ret_val.c b/tools/testing/selftests/bpf/verifier/map_ret_val.c new file mode 100644 index 000000000000..bdd0e8d18333 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ret_val.c @@ -0,0 +1,65 @@ +{ + "invalid map_fd for function call", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem), + BPF_EXIT_INSN(), + }, + .errstr = "fd 0 is not pointing to valid bpf_map", + .result = REJECT, +}, +{ + "don't check return value before access", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 invalid mem access 'map_value_or_null'", + .result = REJECT, +}, +{ + "access memory with incorrect alignment", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "misaligned value access", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, +{ + "sometimes access memory with incorrect alignment", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 invalid mem access", + .errstr_unpriv = "R0 leaks addr", + .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, +}, diff --git a/tools/testing/selftests/bpf/verifier/masking.c b/tools/testing/selftests/bpf/verifier/masking.c new file mode 100644 index 000000000000..6e1358c544fd --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/masking.c @@ -0,0 +1,322 @@ +{ + "masking, test out of bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 11", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test out of bounds 12", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test in bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 4, +}, +{ + "masking, test in bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test in bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xfffffffe, +}, +{ + "masking, test in bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xabcde), + BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xabcde, +}, +{ + "masking, test in bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "masking, test in bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 46), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, +}, +{ + "masking, test in bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -46), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, +}, +{ + "masking, test in bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -47), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c new file mode 100644 index 000000000000..205292b8dd65 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/meta_access.c @@ -0,0 +1,235 @@ +{ + "meta access, test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet, off=-8", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_meta), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 !read_ok", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test10", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test11", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "meta access, test12", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, +}, diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c new file mode 100644 index 000000000000..471c1a5950d8 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c @@ -0,0 +1,59 @@ +{ + "check bpf_perf_event_data->sample_period byte load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 7), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "check bpf_perf_event_data->sample_period half load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 6), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "check bpf_perf_event_data->sample_period word load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), +#else + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period) + 4), +#endif + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "check bpf_perf_event_data->sample_period dword load permitted", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c new file mode 100644 index 000000000000..bbdba990fefb --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c @@ -0,0 +1,74 @@ +{ + "prevent map lookup in sockmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +}, +{ + "prevent map lookup in sockhash", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockhash = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +}, +{ + "prevent map lookup in xskmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "prevent map lookup in stack trace", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_stacktrace = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "prevent map lookup in prog array", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_prog2 = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem", +}, diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c new file mode 100644 index 000000000000..193d9e87d5a9 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/raw_stack.c @@ -0,0 +1,305 @@ +{ + "raw_stack: no skb_load_bytes", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + /* Call to skb_load_bytes() omitted. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid read from stack off -8+0 size 8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, negative len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R4 min value is negative", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, negative len 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, ~0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R4 min value is negative", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, zero len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, no init", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, init", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, spilled regs around bounds", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, spilled regs corruption", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "raw_stack: skb_load_bytes, spilled regs corruption 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3, + offsetof(struct __sk_buff, pkt_type)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "raw_stack: skb_load_bytes, spilled regs + data", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, + offsetof(struct __sk_buff, priority)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-513 access_size=8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-1 access_size=8", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R4 min value is negative", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, invalid access 6", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3 off=-512 access_size=0", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "raw_stack: skb_load_bytes, large access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 512), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c new file mode 100644 index 000000000000..dc2cc823df2b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -0,0 +1,607 @@ +{ + "reference tracking: leak potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: leak potential reference on stack", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: leak potential reference on stack 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: zero potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: copy and zero potential references", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking: release reference without check", + .insns = { + BPF_SK_LOOKUP, + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_or_null expected=sock", + .result = REJECT, +}, +{ + "reference tracking: release reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: release reference 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: release reference twice", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, +}, +{ + "reference tracking: release reference twice inside branch", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, +}, +{ + "reference tracking: alloc, check, free in one subbranch", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ + /* Leak reference in R0 */ + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "reference tracking: alloc, check, free in both subbranches", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "reference tracking in call: free reference in subprog", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking in call: free reference in subprog and outside", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, +}, +{ + "reference tracking in call: alloc & leak reference in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), + BPF_SK_LOOKUP, + /* spill unchecked sk_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking in call: alloc in subprog, release outside", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), /* return sk */ + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + .result = ACCEPT, +}, +{ + "reference tracking in call: sk_ptr leak into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, +}, +{ + "reference tracking in call: sk_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* now the sk_ptr is verified, free the reference */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: allow LD_ABS", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: forbid LD_ABS while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, +}, +{ + "reference tracking: allow LD_IND", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, +{ + "reference tracking: forbid LD_IND while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, +}, +{ + "reference tracking: check reference or tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: release reference then tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 18 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: leak possible reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + /* bpf_tail_call() */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 16 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, +}, +{ + "reference tracking: leak checked reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if (!sk) goto end */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, +}, +{ + "reference tracking: mangle and release sock_or_null", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock_or_null prohibited", + .result = REJECT, +}, +{ + "reference tracking: mangle and release sock", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock prohibited", + .result = REJECT, +}, +{ + "reference tracking: access member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: write to member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LD_IMM64(BPF_REG_2, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2, + offsetof(struct bpf_sock, mark)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "cannot write into socket", + .result = REJECT, +}, +{ + "reference tracking: invalid 64-bit access of member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_sock access off=0 size=8", + .result = REJECT, +}, +{ + "reference tracking: access after release", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "reference tracking: direct access for lookup", + .insns = { + /* Check that the packet is at least 64B long */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c new file mode 100644 index 000000000000..a9a8f620e71c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/runtime_jit.c @@ -0,0 +1,80 @@ +{ + "runtime/jit: tail_call within bounds, prog once", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 42, +}, +{ + "runtime/jit: tail_call within bounds, prog loop", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 41, +}, +{ + "runtime/jit: tail_call within bounds, no prog", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "runtime/jit: tail_call out of bounds", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 256), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 2, +}, +{ + "runtime/jit: pass negative index to tail_call", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 2, +}, +{ + "runtime/jit: pass > 32bit index to tail_call", + .insns = { + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 2 }, + .result = ACCEPT, + .retval = 42, + /* Verifier rewrite for unpriv skips tail call here. */ + .retval_unpriv = 2, +}, diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c new file mode 100644 index 000000000000..7e50cb80873a --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -0,0 +1,156 @@ +{ + "pointer/scalar confusion in state equality check (way 1)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_JMP_A(1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .retval = POINTER_VALUE, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" +}, +{ + "pointer/scalar confusion in state equality check (way 2)", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_JMP_A(1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, + .retval = POINTER_VALUE, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr as return value" +}, +{ + "liveness pruning and write screening", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* branch conditions teach us nothing about R2 */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "varlen_map_value_access pruning", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .errstr = "R0 unbounded memory access", + .result_unpriv = REJECT, + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "allocated_stack", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8), + BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9), + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, + .insn_processed = 15, +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c new file mode 100644 index 000000000000..d58db72fdfe8 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -0,0 +1,75 @@ +{ + "check valid spill/fill", + .insns = { + /* spill R1(ctx) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* fill it back into R2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), + /* should be able to access R0 = *(R2 + 8) */ + /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + .retval = POINTER_VALUE, +}, +{ + "check valid spill/fill, skb mark", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, +}, +{ + "check corrupted spill/fill", + .insns = { + /* spill R1(ctx) into stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + /* mess up with R1 pointer on stack */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), + /* fill back into R0 is fine for priv. + * R0 now becomes SCALAR_VALUE. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + /* Load from R0 should fail. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .errstr = "R0 invalid mem access 'inv", + .result = REJECT, +}, +{ + "check corrupted spill/fill, LSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, +}, +{ + "check corrupted spill/fill, MSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, +}, diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c new file mode 100644 index 000000000000..7276620ef242 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -0,0 +1,317 @@ +{ + "PTR_TO_STACK store/load", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xfaceb00c, +}, +{ + "PTR_TO_STACK store/load - bad alignment on off", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", +}, +{ + "PTR_TO_STACK store/load - bad alignment on reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", +}, +{ + "PTR_TO_STACK store/load - out of bounds low", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=-79992 size=8", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", +}, +{ + "PTR_TO_STACK store/load - out of bounds high", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off=0 size=8", +}, +{ + "PTR_TO_STACK check high 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK check high 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK check high 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK check high 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=0 size=1", + .result = REJECT, +}, +{ + "PTR_TO_STACK check high 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", +}, +{ + "PTR_TO_STACK check high 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", +}, +{ + "PTR_TO_STACK check high 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", +}, +{ + "PTR_TO_STACK check low 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK check low 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK check low 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=-513 size=1", + .result = REJECT, +}, +{ + "PTR_TO_STACK check low 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between fp pointer", +}, +{ + "PTR_TO_STACK check low 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", +}, +{ + "PTR_TO_STACK check low 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", +}, +{ + "PTR_TO_STACK check low 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", +}, +{ + "PTR_TO_STACK mixed reg/k, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK mixed reg/k, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, +}, +{ + "PTR_TO_STACK mixed reg/k, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -3, +}, +{ + "PTR_TO_STACK reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid stack off=0 size=1", + .result = ACCEPT, + .retval = 42, +}, +{ + "stack pointer arithmetic", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_ST_MEM(0, BPF_REG_2, 4, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/uninit.c b/tools/testing/selftests/bpf/verifier/uninit.c new file mode 100644 index 000000000000..987a5871ff1d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/uninit.c @@ -0,0 +1,39 @@ +{ + "read uninitialized register", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R2 !read_ok", + .result = REJECT, +}, +{ + "read invalid register", + .insns = { + BPF_MOV64_REG(BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .errstr = "R15 is invalid", + .result = REJECT, +}, +{ + "program doesn't init R0 before exit", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, +}, +{ + "program doesn't init R0 before exit in all branches", + .insns = { + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .errstr_unpriv = "R1 pointer comparison", + .result = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c new file mode 100644 index 000000000000..dca58cf1a4ab --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -0,0 +1,521 @@ +{ + "unpriv: return pointer", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, +}, +{ + "unpriv: add const to pointer", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "unpriv: add pointer to pointer", + .insns = { + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 pointer += pointer", +}, +{ + "unpriv: neg pointer", + .insns = { + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 pointer arithmetic", +}, +{ + "unpriv: cmp pointer with const", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 pointer comparison", +}, +{ + "unpriv: cmp pointer with pointer", + .insns = { + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R10 pointer comparison", +}, +{ + "unpriv: check that printk is disallowed", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "unknown func bpf_trace_printk#6", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: pass pointer to helper function", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R4 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: indirectly pass pointer on stack to helper function", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, +}, +{ + "unpriv: mangle pointer on stack 1", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: mangle pointer on stack 2", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), + BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: read pointer from stack in small chunks", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid size", + .result = REJECT, +}, +{ + "unpriv: write pointer into ctx", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 leaks addr", + .result_unpriv = REJECT, + .errstr = "invalid bpf_context access", + .result = REJECT, +}, +{ + "unpriv: spill/fill of ctx", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "unpriv: spill/fill of ctx 2", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of ctx 3", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=fp expected=ctx", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of ctx 4", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, BPF_REG_0, -8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=inv expected=ctx", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers stx", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers stx - ctx and sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + /* if (sk) bpf_sk_release(sk) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=ctx expected=sock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers stx - leak sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "Unreleased reference", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers stx - sock and ctx (read)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers stx - sock and ctx (write)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) sk->mark = 42; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "cannot write into socket", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "unpriv: spill/fill of different pointers ldx", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, + -(__s32)offsetof(struct bpf_perf_event_data, + sample_period) - 8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, + offsetof(struct bpf_perf_event_data, sample_period)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_PERF_EVENT, +}, +{ + "unpriv: write pointer into map elem value", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, +{ + "unpriv: partial copy of pointer", + .insns = { + BPF_MOV32_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 partial copy", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: pass pointer to tail_call", + .insns = { + BPF_MOV64_REG(BPF_REG_3, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .errstr_unpriv = "R3 leaks addr into helper", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: cmp map pointer with zero", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .errstr_unpriv = "R1 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: write into frame pointer", + .insns = { + BPF_MOV64_REG(BPF_REG_10, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "frame pointer is read only", + .result = REJECT, +}, +{ + "unpriv: spill/fill frame pointer", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "frame pointer is read only", + .result = REJECT, +}, +{ + "unpriv: cmp of frame pointer", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R10 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: adding of fp", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: cmp of stack pointer", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R2 pointer comparison", + .result_unpriv = REJECT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/value.c b/tools/testing/selftests/bpf/verifier/value.c new file mode 100644 index 000000000000..0e42592b1218 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/value.c @@ -0,0 +1,104 @@ +{ + "map element value store of cleared call register", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R1 !read_ok", + .errstr = "R1 !read_ok", + .result = REJECT, + .result_unpriv = REJECT, +}, +{ + "map element value with unaligned store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), + BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "map element value with unaligned load", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "map element value is preserved across register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(struct test_val, foo)), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_adj_spill.c b/tools/testing/selftests/bpf/verifier/value_adj_spill.c new file mode 100644 index 000000000000..7135e8021b81 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/value_adj_spill.c @@ -0,0 +1,43 @@ +{ + "map element value is preserved across register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, +}, +{ + "map element value or null is marked on register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c new file mode 100644 index 000000000000..7f6c232cd842 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c @@ -0,0 +1,94 @@ +{ + "map element value illegal alu op, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 bitwise operator &= on pointer", + .result = REJECT, +}, +{ + "map element value illegal alu op, 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 32-bit pointer arithmetic prohibited", + .result = REJECT, +}, +{ + "map element value illegal alu op, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 pointer arithmetic with /= operator", + .result = REJECT, +}, +{ + "map element value illegal alu op, 4", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "map element value illegal alu op, 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_3, 4096), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 invalid mem access 'inv'", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c new file mode 100644 index 000000000000..860d4a71cd83 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/value_or_null.c @@ -0,0 +1,152 @@ +{ + "multiple registers share map_lookup_elem result", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "alu ops on ptr_to_map_value_or_null, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "alu ops on ptr_to_map_value_or_null, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "alu ops on ptr_to_map_value_or_null, 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "invalid memory access with multiple map_lookup_elem calls", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .result = REJECT, + .errstr = "R4 !read_ok", + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "valid indirect map_lookup_elem access with 2nd lookup in branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_2, 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS +}, +{ + "invalid map access from else condition", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 3 }, + .errstr = "R0 unbounded memory access", + .result = REJECT, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c new file mode 100644 index 000000000000..9ab5ace83e02 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -0,0 +1,792 @@ +{ + "map access: known scalar += value_ptr from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps", + .retval = 1, +}, +{ + "map access: value_ptr -= known scalar from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 min value is outside of the array range", + .retval = 1, +}, +{ + "map access: known scalar += value_ptr from different maps, but same value properties", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: mixing value pointer and scalar, 1", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_A(2), + // branch B + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .retval = 0, +}, +{ + "map access: mixing value pointer and scalar, 2", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + // branch B + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different maps or paths", + .retval = 0, +}, +{ + "sanitation: alu with different scalars", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + BPF_MOV64_IMM(BPF_REG_2, 42), + BPF_MOV64_IMM(BPF_REG_3, 0x100001), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 0x100000, +}, +{ + "map access: value_ptr += known scalar, upper oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr += known scalar, upper oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr += known scalar, upper oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr -= known scalar, lower oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", +}, +{ + "map access: value_ptr -= known scalar, lower oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr -= known scalar, lower oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: known scalar += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: value_ptr += known scalar, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: value_ptr += known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", +}, +{ + "map access: value_ptr += known scalar, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", +}, +{ + "map access: value_ptr += known scalar, 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr += known scalar, 5", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, +}, +{ + "map access: value_ptr += known scalar, 6", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, +}, +{ + "map access: unknown scalar += value_ptr, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: unknown scalar += value_ptr, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, +}, +{ + "map access: unknown scalar += value_ptr, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 0xabcdef12, +}, +{ + "map access: unknown scalar += value_ptr, 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_1, 19), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 max value is outside of the array range", + .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", +}, +{ + "map access: value_ptr += unknown scalar, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: value_ptr += unknown scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, +}, +{ + "map access: value_ptr += unknown scalar, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: value_ptr += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 pointer += pointer prohibited", +}, +{ + "map access: known scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", +}, +{ + "map access: value_ptr -= known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", +}, +{ + "map access: value_ptr -= known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: unknown scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", +}, +{ + "map access: value_ptr -= unknown scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is negative", +}, +{ + "map access: value_ptr -= unknown scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, +}, +{ + "map access: value_ptr -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + .errstr_unpriv = "R0 pointer -= pointer prohibited", +}, diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c new file mode 100644 index 000000000000..1e536ff121a5 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -0,0 +1,66 @@ +{ + "variable-offset ctx access", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + /* add it to skb. We now have either &skb->len or + * &skb->pkt_type, but we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + /* dereference it */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "variable ctx access var_off=(0x0; 0x4)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "variable-offset stack access", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_EXIT_INSN(), + }, + .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, +{ + "indirect variable-offset stack access", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it indirectly */ + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 5 }, + .errstr = "variable stack read R2", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, +}, diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c new file mode 100644 index 000000000000..c5de2e62cc8b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/xadd.c @@ -0,0 +1,97 @@ +{ + "xadd/w check unaligned stack", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned stack access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "xadd/w check unaligned map", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "misaligned value access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ + "xadd/w check unaligned pkt", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 99), + BPF_JMP_IMM(BPF_JA, 0, 0, 6), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_XADD stores into R2 pkt is not allowed", + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "xadd/w check whether src/dst got mangled, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 3, +}, +{ + "xadd/w check whether src/dst got mangled, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 3, +}, diff --git a/tools/testing/selftests/bpf/verifier/xdp.c b/tools/testing/selftests/bpf/verifier/xdp.c new file mode 100644 index 000000000000..5ac390508139 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/xdp.c @@ -0,0 +1,14 @@ +{ + "XDP, using ifindex from netdev", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, ingress_ifindex)), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c new file mode 100644 index 000000000000..bfb97383e6b5 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -0,0 +1,900 @@ +{ + "XDP pkt read, pkt_end mangling, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R3 pointer arithmetic on pkt_end", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "XDP pkt read, pkt_end mangling, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R3 pointer arithmetic on pkt_end", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', bad access 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 94fdbf215c14..c4cf6e6d800e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -25,6 +25,7 @@ ALL_TESTS=" lag_unlink_slaves_test lag_dev_deletion_test vlan_interface_uppers_test + bridge_extern_learn_test devlink_reload_test " NUM_NETIFS=2 @@ -541,6 +542,25 @@ vlan_interface_uppers_test() ip link del dev br0 } +bridge_extern_learn_test() +{ + # Test that externally learned entries added from user space are + # marked as offloaded + RET=0 + + ip link add name br0 type bridge + ip link set dev $swp1 master br0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn + + bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload + check_err $? "fdb entry not marked as offloaded when should" + + log_test "externally learned fdb entry" + + ip link del dev br0 +} + devlink_reload_test() { # Test that after executing all the above configuration tests, a diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index b41d6256b2d0..f1922bf597b0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -9,7 +9,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ - bloom_simple_test bloom_complex_test bloom_delta_test" + delta_two_masks_one_key_test bloom_simple_test \ + bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -38,6 +39,55 @@ h2_destroy() simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24 } +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_record_all() +{ + local tracepoint=$1 + local seconds=$2 + + perf record -a -q -e $tracepoint sleep $seconds + return $? +} + +__tp_hit_count() +{ + local tracepoint=$1 + + local perf_output=`perf script -F trace:event,trace` + return `echo $perf_output | grep "$tracepoint:" | wc -l` +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + __tp_hit_count $tracepoint + if [[ "$?" -ne "$count" ]]; then + return 1 + fi + return 0 +} + +tp_check_hits_any() +{ + local tracepoint=$1 + + __tp_hit_count $tracepoint + if [[ "$?" -eq "0" ]]; then + return 1 + fi + return 0 +} + single_mask_test() { # When only a single mask is required, the device uses the master @@ -182,20 +232,38 @@ multiple_masks_test() # spillage is performed correctly and that the right filter is # matched + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + local index RET=0 NUM_MASKS=32 + NUM_ERPS=16 BASE_INDEX=100 for i in $(eval echo {1..$NUM_MASKS}); do index=$((BASE_INDEX - i)) - tc filter add dev $h2 ingress protocol ip pref $index \ - handle $index \ - flower $tcflags dst_ip 192.0.2.2/${i} src_ip 192.0.2.1 \ - action drop + if ((i > NUM_ERPS)); then + exp_hits=1 + err_msg="$i filters - C-TCAM spill did not happen when it was expected" + else + exp_hits=0 + err_msg="$i filters - C-TCAM spill happened when it should not" + fi + + tp_record "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \ + "tc filter add dev $h2 ingress protocol ip pref $index \ + handle $index \ + flower $tcflags \ + dst_ip 192.0.2.2/${i} src_ip 192.0.2.1/${i} \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \ + $exp_hits + check_err $? "$err_msg" $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \ -B 192.0.2.2 -t ip -q @@ -325,28 +393,6 @@ ctcam_edge_cases_test() ctcam_no_atcam_masks_test } -tp_record() -{ - local tracepoint=$1 - local cmd=$2 - - perf record -q -e $tracepoint $cmd - return $? -} - -tp_check_hits() -{ - local tracepoint=$1 - local count=$2 - - perf_output=`perf script -F trace:event,trace` - hits=`echo $perf_output | grep "$tracepoint:" | wc -l` - if [[ "$count" -ne "$hits" ]]; then - return 1 - fi - return 0 -} - delta_simple_test() { # The first filter will create eRP, the second filter will fit into @@ -405,6 +451,49 @@ delta_simple_test() log_test "delta simple test ($tcflags)" } +delta_two_masks_one_key_test() +{ + # If 2 keys are the same and only differ in mask in a way that + # they belong under the same ERP (second is delta of the first), + # there should be no C-TCAM spill. + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.2.0/24 \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 + check_err $? "incorrect C-TCAM spill while inserting the first rule" + + tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 + check_err $? "incorrect C-TCAM spill while inserting the second rule" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "delta two masks one key test ($tcflags)" +} + bloom_simple_test() { # Bloom filter requires that the eRP table is used. This test diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c index f8d468f54e98..aaa1e9f083c3 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c @@ -37,7 +37,7 @@ static int get_debugfs(char **path) struct libmnt_table *tb; struct libmnt_iter *itr = NULL; struct libmnt_fs *fs; - int found = 0; + int found = 0, ret; cxt = mnt_new_context(); if (!cxt) @@ -58,8 +58,11 @@ static int get_debugfs(char **path) break; } } - if (found) - asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (found) { + ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (ret < 0) + err(EXIT_FAILURE, "failed to format string"); + } mnt_free_iter(itr); mnt_free_context(cxt); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 23022e9d32eb..b52cfdefecbf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -571,7 +571,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, guest_paddr + npages * vm->page_size); + vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); if (region != NULL) TEST_ASSERT(false, "overlapping userspace_mem_region already " "exists\n" @@ -587,15 +587,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region = region->next) { if (region->region.slot == slot) break; - if ((guest_paddr <= (region->region.guest_phys_addr - + region->region.memory_size)) - && ((guest_paddr + npages * vm->page_size) - >= region->region.guest_phys_addr)) - break; } if (region != NULL) TEST_ASSERT(false, "A mem region with the requested slot " - "or overlapping physical memory range already exists.\n" + "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", slot, guest_paddr, npages, diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index ea3c73e8f4f6..c49c2a28b0eb 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -103,6 +103,12 @@ int main(int argc, char *argv[]) vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x\n", + evmcs_ver & 0xff, evmcs_ver >> 8); + run = vcpu_state(vm, VCPU_ID); vcpu_regs_get(vm, VCPU_ID, ®s1); diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 04c6431b2bd8..b90dff8d3a94 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -109,6 +109,38 @@ vlan_deletion() ping_ipv6 } +extern_learn() +{ + local mac=de:ad:be:ef:13:37 + local ageing_time + + # Test that externally learned FDB entries can roam, but not age out + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1 + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "Did not find FDB entry when should" + + # Wait for 10 seconds after the ageing time to make sure the FDB entry + # was not aged out + ageing_time=$(bridge_ageing_time_get br0) + sleep $((ageing_time + 10)) + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry was aged out when should not" + + $MZ $h2 -c 1 -p 64 -a $mac -t ip -q + + bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry did not roam when should" + + log_test "Externally learned FDB entry - ageing & roaming" + + bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null + bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 5ab1e5f43022..57cf8914910d 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -32,7 +32,7 @@ DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ ############################################################################## # Sanity checks -devlink -j resource show "$DEVLINK_DEV" &> /dev/null +devlink help 2>&1 | grep resource &> /dev/null if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing devlink resource support" exit 1 diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh new file mode 100755 index 000000000000..abb694397b86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel without key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh new file mode 100755 index 000000000000..c4f373337e48 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_key.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 key 233 + sw2_flat_create gre $ol2 $ul2 key 233 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh new file mode 100755 index 000000000000..a811130c0627 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_flat_gre_keys.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS="gre_flat4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 ikey 111 okey 222 + sw2_flat_create gre $ol2 $ul2 ikey 222 okey 111 +} + +gre_flat4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh new file mode 100755 index 000000000000..05c5b3cf2f78 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 + sw2_hierarchical_create gre $ol2 $ul2 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh new file mode 100755 index 000000000000..9b105dbca32a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_key.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 key 22 + sw2_hierarchical_create gre $ol2 $ul2 key 22 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh new file mode 100755 index 000000000000..e275d25bd83a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_hier_gre_keys.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ipip_lib.sh for more details. + +ALL_TESTS="gre_hier4 gre_mtu_change" + +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create gre $ol1 $ul1 ikey 111 okey 222 + sw2_hierarchical_create gre $ol2 $ul2 ikey 222 okey 111 +} + +gre_hier4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre hierarchical with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh new file mode 100644 index 000000000000..30f36a57bae6 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -0,0 +1,349 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Handles creation and destruction of IP-in-IP or GRE tunnels over the given +# topology. Supports both flat and hierarchical models. +# +# Flat Model: +# Overlay and underlay share the same VRF. +# SW1 uses default VRF so tunnel has no bound dev. +# SW2 uses non-default VRF tunnel has a bound dev. +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | | +# | + g1a (gre) | +# | loc=192.0.2.65 | +# | rem=192.0.2.66 --. | +# | tos=inherit | | +# | .------------------' | +# | | | +# | v | +# | + $ul1.111 (vlan) | +# | | 192.0.2.129/28 | +# | \ | +# | \_______ | +# | | | +# |VRF default + $ul1 | +# +------------|------------+ +# | +# +------------|------------+ +# | SW2 + $ul2 | +# | _______| | +# | / | +# | / | +# | + $ul2.111 (vlan) | +# | ^ 192.0.2.130/28 | +# | | | +# | | | +# | '------------------. | +# | + g2a (gre) | | +# | loc=192.0.2.66 | | +# | rem=192.0.2.65 --' | +# | tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | VRF v$ol2 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# +-------------------------+ +# +# Hierarchical model: +# The tunnel is bound to a device in a different VRF +# +# +---------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-------+ +# | +# +-------------------|-------+ +# | SW1 | | +# | +-----------------|-----+ | +# | | $ol1 + | | +# | | 192.0.2.2/28 | | +# | | | | +# | | + g1a (gre) | | +# | | rem=192.0.2.66 | | +# | | tos=inherit | | +# | | loc=192.0.2.65 | | +# | | ^ | | +# | | VRF v$ol1 | | | +# | +-----------|-----------+ | +# | | | +# | +-----------|-----------+ | +# | | VRF v$ul1 | | | +# | | | | | +# | | | | | +# | | v | | +# | | dummy1 + | | +# | | 192.0.2.65 | | +# | | .-------' | | +# | | | | | +# | | v | | +# | | + $ul1.111 (vlan) | | +# | | | 192.0.2.129/28 | | +# | | \ | | +# | | \_____ | | +# | | | | | +# | | + $ul1 | | +# | +----------|------------+ | +# +------------|--------------+ +# | +# +------------|--------------+ +# | SW2 | | +# | +----------|------------+ | +# | | + $ul2 | | +# | | _____| | | +# | | / | | +# | | / | | +# | | | $ul2.111 (vlan) | | +# | | + 192.0.2.130/28 | | +# | | ^ | | +# | | | | | +# | | '-------. | | +# | | dummy2 + | | +# | | 192.0.2.66 | | +# | | ^ | | +# | | | | | +# | | | | | +# | | VRF v$ul2 | | | +# | +-----------|-----------+ | +# | | | +# | +-----------|-----------+ | +# | | VRF v$ol2 | | | +# | | | | | +# | | v | | +# | | g2a (gre)+ | | +# | | loc=192.0.2.66 | | +# | | rem=192.0.2.65 | | +# | | tos=inherit | | +# | | | | +# | | $ol2 + | | +# | | 192.0.2.17/28 | | | +# | +-----------------|-----+ | +# +-------------------|-------+ +# | +# +-------------------|-------+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# +---------------------------+ +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 +} + +h1_destroy() +{ + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 +} + +h2_destroy() +{ + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 +} + +sw1_flat_create() +{ + local type=$1; shift + local ol1=$1; shift + local ul1=$1; shift + + ip link set dev $ol1 up + __addr_add_del $ol1 add "192.0.2.2/28" + + ip link set dev $ul1 up + vlan_create $ul1 111 "" 192.0.2.129/28 + + tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit "$@" + ip link set dev g1a up + __addr_add_del g1a add "192.0.2.65/32" + + ip route add 192.0.2.66/32 via 192.0.2.130 + + ip route add 192.0.2.16/28 nexthop dev g1a +} + +sw1_flat_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip route del 192.0.2.16/28 + + ip route del 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + __simple_if_fini $ol1 192.0.2.2/28 +} + +sw2_flat_create() +{ + local type=$1; shift + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 192.0.2.17/28 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + + tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 \ + "$@" + __simple_if_init g2a v$ol2 192.0.2.66/32 + + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a +} + +sw2_flat_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip route del vrf v$ol2 192.0.2.0/28 + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 +} + +sw1_hierarchical_create() +{ + local type=$1; shift + local ol1=$1; shift + local ul1=$1; shift + + simple_if_init $ol1 192.0.2.2/28 + simple_if_init $ul1 + ip link add name dummy1 type dummy + __simple_if_init dummy1 v$ul1 192.0.2.65/32 + + vlan_create $ul1 111 v$ul1 192.0.2.129/28 + tunnel_create g1a $type 192.0.2.65 192.0.2.66 tos inherit dev dummy1 \ + "$@" + ip link set dev g1a master v$ol1 + + ip route add vrf v$ul1 192.0.2.66/32 via 192.0.2.130 + ip route add vrf v$ol1 192.0.2.16/28 nexthop dev g1a +} + +sw1_hierarchical_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip route del vrf v$ol1 192.0.2.16/28 + ip route del vrf v$ul1 192.0.2.66/32 + + tunnel_destroy g1a + vlan_destroy $ul1 111 + + __simple_if_fini dummy1 192.0.2.65/32 + ip link del dev dummy1 + + simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 +} + +sw2_hierarchical_create() +{ + local type=$1; shift + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 192.0.2.17/28 + simple_if_init $ul2 + + ip link add name dummy2 type dummy + __simple_if_init dummy2 v$ul2 192.0.2.66/32 + + vlan_create $ul2 111 v$ul2 192.0.2.130/28 + tunnel_create g2a $type 192.0.2.66 192.0.2.65 tos inherit dev dummy2 \ + "$@" + ip link set dev g2a master v$ol2 + + ip route add vrf v$ul2 192.0.2.65/32 via 192.0.2.129 + ip route add vrf v$ol2 192.0.2.0/28 nexthop dev g2a +} + +sw2_hierarchical_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip route del vrf v$ol2 192.0.2.0/28 + ip route del vrf v$ul2 192.0.2.65/32 + + tunnel_destroy g2a + vlan_destroy $ul2 111 + + __simple_if_fini dummy2 192.0.2.66/32 + ip link del dev dummy2 + + simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 +} + +topo_mtu_change() +{ + local mtu=$1 + + ip link set mtu $mtu dev $h1 + ip link set mtu $mtu dev $ol1 + ip link set mtu $mtu dev g1a + ip link set mtu $mtu dev $ul1 + ip link set mtu $mtu dev $ul1.111 + ip link set mtu $mtu dev $h2 + ip link set mtu $mtu dev $ol2 + ip link set mtu $mtu dev g2a + ip link set mtu $mtu dev $ul2 + ip link set mtu $mtu dev $ul2.111 +} + +test_mtu_change() +{ + local encap=$1; shift + + RET=0 + + ping_do $h1 192.0.2.18 "-s 1800 -w 3" + check_fail $? "ping $encap should not pass with size 1800" + + RET=0 + + topo_mtu_change 2000 + ping_do $h1 192.0.2.18 "-s 1800 -w 3" + check_err $? + log_test "ping $encap packet size 1800 after MTU change" +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 3f248d1f5b91..c1f16bb992dc 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -211,7 +211,7 @@ log_test() return 1 fi - printf "TEST: %-60s [PASS]\n" "$test_name $opt_str" + printf "TEST: %-60s [ OK ]\n" "$test_name $opt_str" return 0 } diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh new file mode 100755 index 000000000000..a0b5f57d6bd3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -0,0 +1,567 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------------+ +------------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 10.1.1.101/24 | | | 10.1.2.101/24 | +# | | default via 10.1.1.1 | | | default via 10.1.2.1 | +# +----|----------------------+ +----|-------------------------+ +# | | +# +----|--------------------------------------------|-------------------------+ +# | SW | | | +# | +--|--------------------------------------------|-----------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 10.0.0.1 local 10.0.0.1 | | +# | | remote 10.0.0.2 remote 10.0.0.2 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 10.1.1.11/24 10.1.2.11/24 | | | +# | | | | | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | 10.1.1.1/24 10.1.2.1/24 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 192.0.2.1/24 10.0.0.1/32 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 192.0.3.2/24 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 192.0.3.1/24 10.0.0.2/32 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 10.1.1.1/24 10.1.2.1/24 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 10.1.1.12/24 10.1.2.12/24 | | | +# | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 10.0.0.2 local 10.0.0.2 | | +# | | remote 10.0.0.1 remote 10.0.0.1 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 10.1.1.102/24 | | 10.1.2.102/24 | | +# | | default via 10.1.1.1 | | default via 10.1.2.1 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + arp_decap + arp_suppression +" +NUM_NETIFS=6 +source lib.sh + +require_command $ARPING + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/24 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/24 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 10.1.1.101 10.1.1.1 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 10.1.1.101 10.1.1.1 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 10.1.2.101 10.1.2.1 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 10.1.2.101 10.1.2.1 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 192.0.2.1/24 + ip route add 10.0.0.2/32 nexthop via 192.0.2.2 + + ip link add name vx10 type vxlan id 1000 \ + local 10.0.0.1 remote 10.0.0.2 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 10.0.0.1 remote 10.0.0.2 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 20 dev $swp2 pvid untagged + + ip address add 10.0.0.1/32 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 10.1.1.11/24 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.1.1/24 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 10.1.2.11/24 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.2.1/24 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} + +switch_destroy() +{ + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 10.0.0.1/32 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 10.0.0.2/32 nexthop via 192.0.2.2 + ip address del dev $rp1 192.0.2.1/24 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 192.0.2.2/24 dev $rp2 + ip address add 192.0.3.2/24 dev v1 + + ip route add 10.0.0.1/32 vrf vrf-spine nexthop via 192.0.2.1 + ip route add 10.0.0.2/32 vrf vrf-spine nexthop via 192.0.3.1 +} + +spine_destroy() +{ + ip route del 10.0.0.2/32 vrf vrf-spine nexthop via 192.0.3.1 + ip route del 10.0.0.1/32 vrf vrf-spine nexthop via 192.0.2.1 + + ip address del 192.0.3.2/24 dev v1 + ip address del 192.0.2.2/24 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 10.1.1.102 10.1.1.1 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 10.1.2.102 10.1.2.1 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 192.0.3.1/24 + ip route add 10.0.0.1/32 nexthop via 192.0.3.2 + + ip link add name vx10 type vxlan id 1000 \ + local 10.0.0.2 remote 10.0.0.1 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 10.0.0.2 remote 10.0.0.1 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 10.0.0.2/32 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 10.1.1.12/24 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.1.1/24 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 10.1.2.12/24 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.2.1/24 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +macs_populate() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f macs_populate + +macs_initialize() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + macs_populate $h1_ns_mac $h2_ns_mac 10.1.1.102 10.1.2.102 10.0.0.2 + in_ns ns1 macs_populate $h1_mac $h2_mac 10.1.1.101 10.1.2.101 10.0.0.1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + + macs_initialize +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 10.1.2.101 ": local->local vid 10->vid 20" + ping_test $h1 10.1.1.102 ": local->remote vid 10->vid 10" + ping_test $h2 10.1.2.102 ": local->remote vid 20->vid 20" + ping_test $h1 10.1.2.102 ": local->remote vid 10->vid 20" + ping_test $h2 10.1.1.102 ": local->remote vid 20->vid 10" +} + +arp_decap() +{ + # Repeat the ping tests, but without populating the neighbours. This + # makes sure we correctly decapsulate ARP packets + log_info "deleting neighbours from vlan interfaces" + + ip neigh del 10.1.1.102 dev vlan10 + ip neigh del 10.1.2.102 dev vlan20 + + ping_ipv4 + + ip neigh replace 10.1.1.102 lladdr $(in_ns ns1 mac_get w2) nud noarp \ + dev vlan10 extern_learn + ip neigh replace 10.1.2.102 lladdr $(in_ns ns1 mac_get w4) nud noarp \ + dev vlan20 extern_learn +} + +arp_suppression_compare() +{ + local expect=$1; shift + local actual=$(in_ns ns1 tc_rule_stats_get vx10 1 ingress) + + (( expect == actual )) + check_err $? "expected $expect arps got $actual" +} + +arp_suppression() +{ + ip link set dev vx10 type bridge_slave neigh_suppress on + + in_ns ns1 tc qdisc add dev vx10 clsact + in_ns ns1 tc filter add dev vx10 ingress proto arp pref 1 handle 101 \ + flower dst_mac ff:ff:ff:ff:ff:ff arp_tip 10.1.1.102 arp_op \ + request action pass + + # The neighbour is configured on the SVI and ARP suppression is on, so + # the ARP request should be suppressed + RET=0 + + $ARPING -I $h1 -fqb -c 1 -w 1 10.1.1.102 + check_err $? "arping failed" + + arp_suppression_compare 0 + + log_test "neigh_suppress: on / neigh exists: yes" + + # Delete the neighbour from the the SVI. A single ARP request should be + # received by the remote VTEP + RET=0 + + ip neigh del 10.1.1.102 dev vlan10 + + $ARPING -I $h1 -fqb -c 1 -w 1 10.1.1.102 + check_err $? "arping failed" + + arp_suppression_compare 1 + + log_test "neigh_suppress: on / neigh exists: no" + + # Turn off ARP suppression and make sure ARP is not suppressed, + # regardless of neighbour existence on the SVI + RET=0 + + ip neigh del 10.1.1.102 dev vlan10 &> /dev/null + ip link set dev vx10 type bridge_slave neigh_suppress off + + $ARPING -I $h1 -fqb -c 1 -w 1 10.1.1.102 + check_err $? "arping failed" + + arp_suppression_compare 2 + + log_test "neigh_suppress: off / neigh exists: no" + + RET=0 + + ip neigh add 10.1.1.102 lladdr $(in_ns ns1 mac_get w2) nud noarp \ + dev vlan10 extern_learn + + $ARPING -I $h1 -fqb -c 1 -w 1 10.1.1.102 + check_err $? "arping failed" + + arp_suppression_compare 3 + + log_test "neigh_suppress: off / neigh exists: yes" + + in_ns ns1 tc qdisc del dev vx10 clsact +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh new file mode 100755 index 000000000000..1209031bc794 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh @@ -0,0 +1,551 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------------+ +------------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 10.1.1.101/24 | | | 10.1.2.101/24 | +# | | default via 10.1.1.1 | | | default via 10.1.2.1 | +# +----|----------------------+ +----|-------------------------+ +# | | +# +----|--------------------------------------------|-------------------------+ +# | SW | | | +# | +--|--------------------------------------------|-----------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 10.0.0.1 local 10.0.0.1 | | +# | | remote 10.0.0.2 remote 10.0.0.2 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 10.0.0.1 | | +# | | remote 10.0.0.2 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | | +# | | + vlan10 | vlan20 + | | +# | | | 10.1.1.11/24 | 10.1.2.11/24 | | | +# | | | | | | | +# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | | +# | | 10.1.1.1/24 vlan4001 10.1.2.1/24 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 192.0.2.1/24 10.0.0.1/32 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 192.0.3.2/24 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 192.0.3.1/24 10.0.0.2/32 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 10.1.1.1/24 10.1.2.1/24 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | vlan4001 | | | +# | | + vlan10 + vlan20 + | | +# | | | 10.1.1.12/24 | 10.1.2.12/24 | | | +# | | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 10.0.0.2 local 10.0.0.2 | | +# | | remote 10.0.0.1 remote 10.0.0.1 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 10.0.0.2 | | +# | | remote 10.0.0.1 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 10.1.1.102/24 | | 10.1.2.102/24 | | +# | | default via 10.1.1.1 | | default via 10.1.2.1 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 +" +NUM_NETIFS=6 +source lib.sh + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/24 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/24 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 10.1.1.101 10.1.1.1 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 10.1.1.101 10.1.1.1 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 10.1.2.101 10.1.2.1 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 10.1.2.101 10.1.2.1 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 192.0.2.1/24 + ip route add 10.0.0.2/32 nexthop via 192.0.2.2 + + ip link add name vx10 type vxlan id 1010 \ + local 10.0.0.1 remote 10.0.0.2 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 10.0.0.1 remote 10.0.0.2 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 20 dev $swp2 pvid untagged + + ip link add name vx4001 type vxlan id 104001 \ + local 10.0.0.1 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip address add 10.0.0.1/32 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 10.1.1.11/24 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.1.1/24 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 10.1.2.11/24 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.2.1/24 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} + +switch_destroy() +{ + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 4001 dev br1 self + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan4001 + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 10.0.0.1/32 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 4001 dev vx4001 + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 10.0.0.2/32 nexthop via 192.0.2.2 + ip address del dev $rp1 192.0.2.1/24 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 192.0.2.2/24 dev $rp2 + ip address add 192.0.3.2/24 dev v1 + + ip route add 10.0.0.1/32 vrf vrf-spine nexthop via 192.0.2.1 + ip route add 10.0.0.2/32 vrf vrf-spine nexthop via 192.0.3.1 +} + +spine_destroy() +{ + ip route del 10.0.0.2/32 vrf vrf-spine nexthop via 192.0.3.1 + ip route del 10.0.0.1/32 vrf vrf-spine nexthop via 192.0.2.1 + + ip address del 192.0.3.2/24 dev v1 + ip address del 192.0.2.2/24 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 10.1.1.102 10.1.1.1 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 10.1.2.102 10.1.2.1 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 192.0.3.1/24 + ip route add 10.0.0.1/32 nexthop via 192.0.3.2 + + ip link add name vx10 type vxlan id 1010 \ + local 10.0.0.2 remote 10.0.0.1 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 10.0.0.2 remote 10.0.0.1 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link add name vx4001 type vxlan id 104001 \ + local 10.0.0.2 dstport 4789 \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 10.0.0.2/32 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 10.1.1.12/24 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.1.1/24 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 10.1.2.12/24 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 10.1.2.1/24 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +__l2_vni_init() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f __l2_vni_init + +l2_vni_init() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + __l2_vni_init $h1_ns_mac $h2_ns_mac 10.1.1.102 10.1.2.102 10.0.0.2 + in_ns ns1 __l2_vni_init $h1_mac $h2_mac 10.1.1.101 10.1.2.101 10.0.0.1 +} + +__l3_vni_init() +{ + local mac=$1; shift + local vtep_ip=$1; shift + local host1_ip=$1; shift + local host2_ip=$1; shift + + bridge fdb add $mac dev vx4001 self master extern_learn static \ + dst $vtep_ip vlan 4001 + + ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn + + ip route add $host1_ip/32 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink + ip route add $host2_ip/32 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink +} +export -f __l3_vni_init + +l3_vni_init() +{ + local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001) + local vlan4001_mac=$(mac_get vlan4001) + + __l3_vni_init $vlan4001_ns_mac 10.0.0.2 10.1.1.102 10.1.2.102 + in_ns ns1 __l3_vni_init $vlan4001_mac 10.0.0.1 10.1.1.101 10.1.2.101 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + + l2_vni_init + l3_vni_init +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 10.1.2.101 ": local->local vid 10->vid 20" + ping_test $h1 10.1.1.102 ": local->remote vid 10->vid 10" + ping_test $h2 10.1.2.102 ": local->remote vid 20->vid 20" + ping_test $h1 10.1.2.102 ": local->remote vid 10->vid 20" + ping_test $h2 10.1.1.102 ": local->remote vid 20->vid 10" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c index 5d56cc0838f6..c0c9ecb891e1 100644 --- a/tools/testing/selftests/net/ip_defrag.c +++ b/tools/testing/selftests/net/ip_defrag.c @@ -20,6 +20,7 @@ static bool cfg_do_ipv4; static bool cfg_do_ipv6; static bool cfg_verbose; static bool cfg_overlap; +static bool cfg_permissive; static unsigned short cfg_port = 9000; const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; @@ -35,7 +36,7 @@ const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; static int payload_len; static int max_frag_len; -#define MSG_LEN_MAX 60000 /* Max UDP payload length. */ +#define MSG_LEN_MAX 10000 /* Max UDP payload length. */ #define IP4_MF (1u << 13) /* IPv4 MF flag. */ #define IP6_MF (1) /* IPv6 MF flag. */ @@ -59,13 +60,14 @@ static void recv_validate_udp(int fd_udp) msg_counter++; if (cfg_overlap) { - if (ret != -1) - error(1, 0, "recv: expected timeout; got %d", - (int)ret); - if (errno != ETIMEDOUT && errno != EAGAIN) - error(1, errno, "recv: expected timeout: %d", - errno); - return; /* OK */ + if (ret == -1 && (errno == ETIMEDOUT || errno == EAGAIN)) + return; /* OK */ + if (!cfg_permissive) { + if (ret != -1) + error(1, 0, "recv: expected timeout; got %d", + (int)ret); + error(1, errno, "recv: expected timeout: %d", errno); + } } if (ret == -1) @@ -203,7 +205,6 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, { struct ip *iphdr = (struct ip *)ip_frame; struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; - const bool ipv4 = !ipv6; int res; int offset; int frag_len; @@ -251,7 +252,7 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, } /* Occasionally test IPv4 "runs" (see net/ipv4/ip_fragment.c) */ - if (ipv4 && !cfg_overlap && (rand() % 100 < 20) && + if (!cfg_overlap && (rand() % 100 < 20) && (payload_len > 9 * max_frag_len)) { offset = 6 * max_frag_len; while (offset < (UDP_HLEN + payload_len)) { @@ -276,41 +277,38 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, while (offset < (UDP_HLEN + payload_len)) { send_fragment(fd_raw, addr, alen, offset, ipv6); /* IPv4 ignores duplicates, so randomly send a duplicate. */ - if (ipv4 && (1 == rand() % 100)) + if (rand() % 100 == 1) send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } if (cfg_overlap) { - /* Send an extra random fragment. */ + /* Send an extra random fragment. + * + * Duplicates and some fragments completely inside + * previously sent fragments are dropped/ignored. So + * random offset and frag_len can result in a dropped + * fragment instead of a dropped queue/packet. Thus we + * hard-code offset and frag_len. + */ + if (max_frag_len * 4 < payload_len || max_frag_len < 16) { + /* not enough payload for random offset and frag_len. */ + offset = 8; + frag_len = UDP_HLEN + max_frag_len; + } else { + offset = rand() % (payload_len / 2); + frag_len = 2 * max_frag_len + 1 + rand() % 256; + } if (ipv6) { struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); /* sendto() returns EINVAL if offset + frag_len is too small. */ - offset = rand() % (UDP_HLEN + payload_len - 1); - frag_len = max_frag_len + rand() % 256; /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */ frag_len &= ~0x7; fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF); ip6hdr->ip6_plen = htons(frag_len); frag_len += IP6_HLEN; } else { - /* In IPv4, duplicates and some fragments completely inside - * previously sent fragments are dropped/ignored. So - * random offset and frag_len can result in a dropped - * fragment instead of a dropped queue/packet. So we - * hard-code offset and frag_len. - * - * See ade446403bfb ("net: ipv4: do not handle duplicate - * fragments as overlapping"). - */ - if (max_frag_len * 4 < payload_len || max_frag_len < 16) { - /* not enough payload to play with random offset and frag_len. */ - offset = 8; - frag_len = IP4_HLEN + UDP_HLEN + max_frag_len; - } else { - offset = rand() % (payload_len / 2); - frag_len = 2 * max_frag_len + 1 + rand() % 256; - } + frag_len += IP4_HLEN; iphdr->ip_off = htons(offset / 8 | IP4_MF); iphdr->ip_len = htons(frag_len); } @@ -327,7 +325,7 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr, while (offset < (UDP_HLEN + payload_len)) { send_fragment(fd_raw, addr, alen, offset, ipv6); /* IPv4 ignores duplicates, so randomly send a duplicate. */ - if (ipv4 && (1 == rand() % 100)) + if (rand() % 100 == 1) send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } @@ -342,7 +340,7 @@ static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) */ struct timeval tv = { .tv_sec = 1, .tv_usec = 10 }; int idx; - int min_frag_len = ipv6 ? 1280 : 8; + int min_frag_len = 8; /* Initialize the payload. */ for (idx = 0; idx < MSG_LEN_MAX; ++idx) @@ -434,7 +432,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46ov")) != -1) { + while ((c = getopt(argc, argv, "46opv")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -445,6 +443,9 @@ static void parse_opts(int argc, char **argv) case 'o': cfg_overlap = true; break; + case 'p': + cfg_permissive = true; + break; case 'v': cfg_verbose = true; break; diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh index 7dd79a9efb17..15d3489ecd9c 100755 --- a/tools/testing/selftests/net/ip_defrag.sh +++ b/tools/testing/selftests/net/ip_defrag.sh @@ -20,6 +20,10 @@ setup() { ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1 ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_time=1 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.netfilter.nf_conntrack_frag6_high_thresh=9000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.netfilter.nf_conntrack_frag6_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.netfilter.nf_conntrack_frag6_timeout=1 >/dev/null 2>&1 + # DST cache can get full with a lot of frags, with GC not keeping up with the test. ip netns exec "${NETNS}" sysctl -w net.ipv6.route.max_size=65536 >/dev/null 2>&1 } @@ -43,4 +47,16 @@ ip netns exec "${NETNS}" ./ip_defrag -6 echo "ipv6 defrag with overlaps" ip netns exec "${NETNS}" ./ip_defrag -6o +# insert an nf_conntrack rule so that the codepath in nf_conntrack_reasm.c taken +ip netns exec "${NETNS}" ip6tables -A INPUT -m conntrack --ctstate INVALID -j ACCEPT + +echo "ipv6 nf_conntrack defrag" +ip netns exec "${NETNS}" ./ip_defrag -6 + +echo "ipv6 nf_conntrack defrag with overlaps" +# netfilter will drop some invalid packets, so we run the test in +# permissive mode: i.e. pass the test if the packet is correctly assembled +# even if we sent an overlap +ip netns exec "${NETNS}" ./ip_defrag -6op + echo "all tests done" diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 8db35b99457c..71d7fdc513c1 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -28,6 +28,19 @@ KEY_AES=0x0123456789abcdef0123456789012345 SPI1=0x1 SPI2=0x2 +do_esp_policy() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow +} + do_esp() { local ns=$1 local me=$2 @@ -40,10 +53,59 @@ do_esp() { ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet - # to encrypt packets as they go out (includes forwarded packets that need encapsulation) - ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow - # to fwd decrypted packets after esp processing: - ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow + do_esp_policy $ns $me $remote $lnet $rnet +} + +# add policies with different netmasks, to make sure kernel carries +# the policies contained within new netmask over when search tree is +# re-built. +# peer netns that are supposed to be encapsulated via esp have addresses +# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively. +# +# Adding a policy for '10.0.1.0/23' will make it necessary to +# alter the prefix of 10.0.1.0 subnet. +# In case new prefix overlaps with existing node, the node and all +# policies it carries need to be merged with the existing one(s). +# +# Do that here. +do_overlap() +{ + local ns=$1 + + # adds new nodes to tree (neither network exists yet in policy database). + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block + + # adds a new node in the 10.0.0.0/24 tree (dst node exists). + ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block + + # adds a 10.2.0.0/23 node, but for different dst. + ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block + + # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd. + # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23. + # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node + # also has to be merged too, including source-sorted subtrees. + # old: + # 10.0.0.0/24 (node 1 in dst tree of the bin) + # 10.1.0.0/24 (node in src tree of dst node 1) + # 10.2.0.0/24 (node in src tree of dst node 1) + # 10.0.1.0/24 (node 2 in dst tree of the bin) + # 10.0.2.0/24 (node in src tree of dst node 2) + # 10.2.0.0/24 (node in src tree of dst node 2) + # + # The next 'policy add' adds dst '10.0.0.0/23', which means + # that dst node 1 and dst node 2 have to be merged including + # the sub-tree. As no duplicates are allowed, policies in + # the two '10.0.2.0/24' are also merged. + # + # after the 'add', internal search tree should look like this: + # 10.0.0.0/23 (node in dst tree of bin) + # 10.0.2.0/24 (node in src tree of dst node) + # 10.1.0.0/24 (node in src tree of dst node) + # 10.2.0.0/24 (node in src tree of dst node) + # + # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block } do_esp_policy_get_check() { @@ -160,6 +222,41 @@ check_xfrm() { return $lret } +check_exceptions() +{ + logpostfix="$1" + local lret=0 + + # ping to .254 should be excluded from the tunnel (exception is in place). + check_xfrm 0 254 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to fail ($logpostfix)" + lret=1 + else + echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)" + fi + + # ping to .253 should use use ipsec due to direct policy exception. + check_xfrm 1 253 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)" + lret=1 + else + echo "PASS: direct policy matches ($logpostfix)" + fi + + # ping to .2 should use ipsec. + check_xfrm 1 2 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)" + lret=1 + else + echo "PASS: policy matches ($logpostfix)" + fi + + return $lret +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -270,32 +367,44 @@ do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 -# ping to .254 should now be excluded from the tunnel -check_xfrm 0 254 +check_exceptions "exceptions" if [ $? -ne 0 ]; then - echo "FAIL: expected ping to .254 to fail" ret=1 -else - echo "PASS: ping to .254 bypassed ipsec tunnel" fi -# ping to .253 should use use ipsec due to direct policy exception. -check_xfrm 1 253 +# insert block policies with adjacent/overlapping netmasks +do_overlap ns3 + +check_exceptions "exceptions and block policies" if [ $? -ne 0 ]; then - echo "FAIL: expected ping to .253 to use ipsec tunnel" ret=1 -else - echo "PASS: direct policy matches" fi -# ping to .2 should use ipsec. -check_xfrm 1 2 -if [ $? -ne 0 ]; then - echo "FAIL: expected ping to .2 to use ipsec tunnel" - ret=1 -else - echo "PASS: policy matches" -fi +for n in ns3 ns4;do + ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 + sleep $((RANDOM%5)) +done + +check_exceptions "exceptions and block policies after hresh changes" + +# full flush of policy db, check everything gets freed incl. internal meta data +ip -net ns3 xfrm policy flush + +do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 + +# move inexact policies to hash table +ip -net ns3 xfrm policy set hthresh4 16 16 + +sleep $((RANDOM%5)) +check_exceptions "exceptions and block policies after hthresh change in ns3" + +# restore original hthresh settings -- move policies back to tables +for n in ns3 ns4;do + ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 + sleep $((RANDOM%5)) +done +check_exceptions "exceptions and block policies after hresh change to normal" for i in 1 2 3 4;do ip netns del ns$i;done diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 2e563d17cf0c..d1bbafb16f47 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -240,7 +240,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) cm->cmsg_type == IP_RECVERR) || (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) || - (cm->cmsg_level = SOL_PACKET && + (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index e20b017e7073..b2065536d407 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -145,15 +145,12 @@ TEST_F(rtc, alarm_alm_set) { rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); ASSERT_NE(-1, rc); - EXPECT_NE(0, rc); + ASSERT_NE(0, rc); /* Disable alarm interrupts */ rc = ioctl(self->fd, RTC_AIE_OFF, 0); ASSERT_NE(-1, rc); - if (rc == 0) - return; - rc = read(self->fd, &data, sizeof(unsigned long)); ASSERT_NE(-1, rc); TH_LOG("data: %lx", data); @@ -202,7 +199,109 @@ TEST_F(rtc, alarm_wkalm_set) { rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); ASSERT_NE(-1, rc); - EXPECT_NE(0, rc); + ASSERT_NE(0, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_alm_set_minute) { + struct timeval tv = { .tv_sec = 62 }; + unsigned long data; + struct rtc_time tm; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&tm) + 60 - tm.tm_sec; + gmtime_r(&secs, (struct tm *)&tm); + + rc = ioctl(self->fd, RTC_ALM_SET, &tm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_ALM_READ, &tm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d:%02d:%02d.", + tm.tm_hour, tm.tm_min, tm.tm_sec); + + /* Enable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_ON, 0); + ASSERT_NE(-1, rc); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); + + /* Disable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_OFF, 0); + ASSERT_NE(-1, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + TH_LOG("data: %lx", data); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_wkalm_set_minute) { + struct timeval tv = { .tv_sec = 62 }; + struct rtc_wkalrm alarm = { 0 }; + struct rtc_time tm; + unsigned long data; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&alarm.time) + 60 - alarm.time.tm_sec; + gmtime_r(&secs, (struct tm *)&alarm.time); + + alarm.enabled = 1; + + rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_WKALM_RD, &alarm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.", + alarm.time.tm_mday, alarm.time.tm_mon + 1, + alarm.time.tm_year + 1900, alarm.time.tm_hour, + alarm.time.tm_min, alarm.time.tm_sec); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); rc = read(self->fd, &data, sizeof(unsigned long)); ASSERT_NE(-1, rc); diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index fce7f4ce0692..1760b3e39730 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -9,7 +9,7 @@ BINARIES := seccomp_bpf seccomp_benchmark CFLAGS += -Wl,-no-as-needed -Wall seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@ TEST_PROGS += $(BINARIES) EXTRA_CLEAN := $(BINARIES) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 067cb4607d6c..496a9a8c773a 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3044,7 +3044,7 @@ TEST(user_notification_basic) /* Check that the basic notification machinery works */ listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - EXPECT_GE(listener, 0); + ASSERT_GE(listener, 0); /* Installing a second listener in the chain should EBUSY */ EXPECT_EQ(user_trap_syscall(__NR_getpid, @@ -3103,7 +3103,7 @@ TEST(user_notification_kill_in_middle) listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - EXPECT_GE(listener, 0); + ASSERT_GE(listener, 0); /* * Check that nothing bad happens when we kill the task in the middle @@ -3152,7 +3152,7 @@ TEST(user_notification_signal) listener = user_trap_syscall(__NR_gettid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - EXPECT_GE(listener, 0); + ASSERT_GE(listener, 0); pid = fork(); ASSERT_GE(pid, 0); @@ -3215,7 +3215,7 @@ TEST(user_notification_closed_listener) listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - EXPECT_GE(listener, 0); + ASSERT_GE(listener, 0); /* * Check that we get an ENOSYS when the listener is closed. @@ -3376,7 +3376,7 @@ TEST(seccomp_get_notif_sizes) { struct seccomp_notif_sizes sizes; - EXPECT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); + ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 637ea0219617..0da3545cabdb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -17,7 +17,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -41,7 +41,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -65,7 +65,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -89,7 +89,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -113,7 +113,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -137,7 +137,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -161,7 +161,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -185,7 +185,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90", "matchCount": "0", "teardown": [] }, @@ -207,7 +207,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -231,7 +231,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -255,7 +255,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -279,7 +279,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -303,7 +303,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -327,7 +327,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -351,7 +351,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -375,7 +375,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99", "matchCount": "0", "teardown": [] }, @@ -397,7 +397,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -421,7 +421,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -445,7 +445,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -469,7 +469,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -493,7 +493,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -517,7 +517,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -541,7 +541,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -565,7 +565,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -589,7 +589,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1", "matchCount": "0", "teardown": [] }, @@ -611,7 +611,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -635,7 +635,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -659,7 +659,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 11", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -683,7 +683,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -707,7 +707,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -731,7 +731,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -739,7 +739,7 @@ }, { "id": "fac3", - "name": "Create valid ife encode action with index at 32-bit maximnum", + "name": "Create valid ife encode action with index at 32-bit maximum", "category": [ "actions", "ife" @@ -755,7 +755,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4294967295", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -779,7 +779,7 @@ "cmdUnderTest": "$TC actions add action ife decode pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -803,7 +803,7 @@ "cmdUnderTest": "$TC actions add action ife decode pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -827,7 +827,7 @@ "cmdUnderTest": "$TC actions add action ife decode continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -851,7 +851,7 @@ "cmdUnderTest": "$TC actions add action ife decode drop index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -875,7 +875,7 @@ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -899,7 +899,7 @@ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -923,7 +923,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4294967295999", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999", "matchCount": "0", "teardown": [] }, @@ -945,7 +945,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4", "matchCount": "0", "teardown": [] }, @@ -967,7 +967,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -991,7 +991,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4", "matchCount": "0", "teardown": [] }, @@ -1013,7 +1013,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4", "matchCount": "0", "teardown": [] }, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index 10b2d894e436..e7e15a7336b6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -81,35 +81,6 @@ ] ] }, - { - "id": "ba4e", - "name": "Add tunnel_key set action with missing mandatory id parameter", - "category": [ - "actions", - "tunnel_key" - ], - "setup": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2", - "expExitCode": "255", - "verifyCmd": "$TC actions list action tunnel_key", - "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2", - "matchCount": "0", - "teardown": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ] - }, { "id": "a5e0", "name": "Add tunnel_key set action with invalid src_ip parameter", @@ -634,7 +605,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 4", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key" diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 880b96fc80d4..c0534e298b51 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -25,6 +25,7 @@ struct gup_benchmark { __u64 size; __u32 nr_pages_per_call; __u32 flags; + __u64 expansion[10]; /* For future use */ }; int main(int argc, char **argv) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 50f7e9272481..bf1bb15b6fbe 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -1503,7 +1503,7 @@ exit: exit(20); } if (successes != total_nr_tests) { - eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n", + eprintf("ERROR: succeeded fewer than number of tries (%d != %d)\n", successes, total_nr_tests); exit(21); } diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 460b4bdf4c1e..5d546dcdbc80 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -1133,6 +1133,21 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } +void become_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + return; + } + exit(0); +} + /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { @@ -1141,7 +1156,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int nr_allocated_pkeys = 0; int i; - for (i = 0; i < NR_PKEYS*2; i++) { + for (i = 0; i < NR_PKEYS*3; i++) { int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); @@ -1152,20 +1167,26 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", __func__, nr_allocated_pkeys); - break; + } else { + /* + * Ensure the number of successes never + * exceeds the number of keys supported + * in the hardware. + */ + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + + /* + * Make sure that allocation state is properly + * preserved across fork(). + */ + if (i == NR_PKEYS*2) + become_child(); } dprintf3("%s()::%d\n", __func__, __LINE__); - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - /* * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c index 00a26a82fa98..97311333700e 100644 --- a/tools/testing/selftests/x86/unwind_vdso.c +++ b/tools/testing/selftests/x86/unwind_vdso.c @@ -44,7 +44,6 @@ int main() #include #include #include -#include #include #include #include