From 50f9ca048c190b08f06a2b88e4d18d507b528b52 Mon Sep 17 00:00:00 2001 From: Yogesh Mohan Marimuthu Date: Fri, 2 Nov 2018 00:18:48 +0530 Subject: [PATCH 01/60] drm/amd/display: fix sporadic multiple aux transaction failure [why] When there are multiple aux transaction in parallel, it is sometime sporadically the aux transaction starts to continuously fail. The aux transaction was failing because the busy bit for the given gpio pin was always set. The busy bit was alway set because the programming sequence to read, modify and write busy bit was not atomic. Due to which when multiple threads are trying to modify the busy bits for their gpio pins in the same integer variable sometimes the busy bits integer variable is written with old data causing failure. [how] Instead of using individual bits to track gpio pins and grouping them to integers, one byte will be allcoated for each gpio pin. Now whenever a gpio pin needs to be set to mark being used, only writing a value of one to that byte is sufficient, other bytes are not impacted. Also no need to have atomicity with bytes unlike with bits. Signed-off-by: Yogesh Mohan Marimuthu Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/gpio/gpio_service.c | 65 +++++-------------- .../drm/amd/display/dc/gpio/gpio_service.h | 7 +- 2 files changed, 21 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index f20161c5706d..dada04296025 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -56,7 +56,6 @@ struct gpio_service *dal_gpio_service_create( struct dc_context *ctx) { struct gpio_service *service; - uint32_t index_of_id; service = kzalloc(sizeof(struct gpio_service), GFP_KERNEL); @@ -78,44 +77,33 @@ struct gpio_service *dal_gpio_service_create( goto failure_1; } - /* allocate and initialize business storage */ + /* allocate and initialize busyness storage */ { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - index_of_id = 0; service->ctx = ctx; do { uint32_t number_of_bits = service->factory.number_of_pins[index_of_id]; + uint32_t i = 0; - uint32_t number_of_uints = - (number_of_bits + bits_per_uint - 1) / - bits_per_uint; + if (number_of_bits) { + service->busyness[index_of_id] = + kcalloc(number_of_bits, sizeof(char), + GFP_KERNEL); - uint32_t *slot; - - if (number_of_bits) { - uint32_t index_of_uint = 0; - - slot = kcalloc(number_of_uints, - sizeof(uint32_t), - GFP_KERNEL); - - if (!slot) { + if (!service->busyness[index_of_id]) { BREAK_TO_DEBUGGER(); goto failure_2; } do { - slot[index_of_uint] = 0; - - ++index_of_uint; - } while (index_of_uint < number_of_uints); - } else - slot = NULL; - - service->busyness[index_of_id] = slot; + service->busyness[index_of_id][i] = 0; + ++i; + } while (i < number_of_bits); + } else { + service->busyness[index_of_id] = NULL; + } ++index_of_id; } while (index_of_id < GPIO_ID_COUNT); @@ -125,13 +113,8 @@ struct gpio_service *dal_gpio_service_create( failure_2: while (index_of_id) { - uint32_t *slot; - --index_of_id; - - slot = service->busyness[index_of_id]; - - kfree(slot); + kfree(service->busyness[index_of_id]); } failure_1: @@ -169,9 +152,7 @@ void dal_gpio_service_destroy( uint32_t index_of_id = 0; do { - uint32_t *slot = (*ptr)->busyness[index_of_id]; - - kfree(slot); + kfree((*ptr)->busyness[index_of_id]); ++index_of_id; } while (index_of_id < GPIO_ID_COUNT); @@ -192,11 +173,7 @@ static bool is_pin_busy( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - const uint32_t *slot = service->busyness[id] + (en / bits_per_uint); - - return 0 != (*slot & (1 << (en % bits_per_uint))); + return service->busyness[id][en]; } static void set_pin_busy( @@ -204,10 +181,7 @@ static void set_pin_busy( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - service->busyness[id][en / bits_per_uint] |= - (1 << (en % bits_per_uint)); + service->busyness[id][en] = true; } static void set_pin_free( @@ -215,10 +189,7 @@ static void set_pin_free( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - service->busyness[id][en / bits_per_uint] &= - ~(1 << (en % bits_per_uint)); + service->busyness[id][en] = false; } enum gpio_result dal_gpio_service_open( diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h index c7f3081f59cc..1d501a43d13b 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h @@ -36,10 +36,9 @@ struct gpio_service { /* * @brief * Business storage. - * For each member of 'enum gpio_id', - * store array of bits (packed into uint32_t slots), - * index individual bit by 'en' value */ - uint32_t *busyness[GPIO_ID_COUNT]; + * one byte For each member of 'enum gpio_id' + */ + char *busyness[GPIO_ID_COUNT]; }; enum gpio_result dal_gpio_service_open( From c452de15cfe561b1af56dbf040c4006f43a6ec35 Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Mon, 5 Nov 2018 11:42:19 -0500 Subject: [PATCH 02/60] drm/amd/display: 3.2.07 Signed-off-by: Steven Chiu Reviewed-by: Shahin Khayyer Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index dea8bc39c688..70873d28f02a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.06" +#define DC_VER "3.2.07" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From 2119aa17c963e59710617c70bb7b59e89c5ff447 Mon Sep 17 00:00:00 2001 From: David Francis Date: Tue, 9 Oct 2018 09:45:28 -0400 Subject: [PATCH 03/60] drm/amd/display: Start documentation of DC [Why] There are a lot of unintuitive parts of the dm-dc interface. It would help us if these were documented to provide a common understanding of what they are supposed to do [How] Most of this documentation is stubs, to be filled out more thoroughly by the experts Not every dm-accessible function and struct is mentioned. Simple functions like getters, setters, retain, release, create, destroy can be left unadorned. Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 72 ++++++++++++++++++- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 21 +++++- .../gpu/drm/amd/display/dc/core/dc_resource.c | 35 +++++++-- .../gpu/drm/amd/display/dc/core/dc_stream.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_link.h | 6 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 12 +++- 6 files changed, 135 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index dba6b57830c7..8edd0309255b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -62,6 +62,55 @@ const static char DC_BUILD_ID[] = "production-build"; +/** + * DOC: Overview + * + * DC is the OS-agnostic component of the amdgpu DC driver. + * + * DC maintains and validates a set of structs representing the state of the + * driver and writes that state to AMD hardware + * + * Main DC HW structs: + * + * struct dc - The central struct. One per driver. Created on driver load, + * destroyed on driver unload. + * + * struct dc_context - One per driver. + * Used as a backpointer by most other structs in dc. + * + * struct dc_link - One per connector (the physical DP, HDMI, miniDP, or eDP + * plugpoints). Created on driver load, destroyed on driver unload. + * + * struct dc_sink - One per display. Created on boot or hotplug. + * Destroyed on shutdown or hotunplug. A dc_link can have a local sink + * (the display directly attached). It may also have one or more remote + * sinks (in the Multi-Stream Transport case) + * + * struct resource_pool - One per driver. Represents the hw blocks not in the + * main pipeline. Not directly accessible by dm. + * + * Main dc state structs: + * + * These structs can be created and destroyed as needed. There is a full set of + * these structs in dc->current_state representing the currently programmed state. + * + * struct dc_state - The global DC state to track global state information, + * such as bandwidth values. + * + * struct dc_stream_state - Represents the hw configuration for the pipeline from + * a framebuffer to a display. Maps one-to-one with dc_sink. + * + * struct dc_plane_state - Represents a framebuffer. Each stream has at least one, + * and may have more in the Multi-Plane Overlay case. + * + * struct resource_context - Represents the programmable state of everything in + * the resource_pool. Not directly accessible by dm. + * + * struct pipe_ctx - A member of struct resource_context. Represents the + * internal hardware pipeline components. Each dc_plane_state has either + * one or two (in the pipe-split case). + */ + /******************************************************************************* * Private functions ******************************************************************************/ @@ -240,7 +289,7 @@ bool dc_stream_get_crtc_position(struct dc *dc, } /** - * dc_stream_configure_crc: Configure CRC capture for the given stream. + * dc_stream_configure_crc() - Configure CRC capture for the given stream. * @dc: DC Object * @stream: The stream to configure CRC on. * @enable: Enable CRC if true, disable otherwise. @@ -292,7 +341,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, } /** - * dc_stream_get_crc: Get CRC values for the given stream. + * dc_stream_get_crc() - Get CRC values for the given stream. * @dc: DC object * @stream: The DC stream state of the stream to get CRCs from. * @r_cr, g_y, b_cb: CRC values for the three channels are stored here. @@ -1329,6 +1378,11 @@ static enum surface_update_type check_update_surfaces_for_stream( return overall_type; } +/** + * dc_check_update_surfaces_for_stream() - Determine update type (fast, med, or full) + * + * See :c:type:`enum surface_update_type ` for explanation of update types + */ enum surface_update_type dc_check_update_surfaces_for_stream( struct dc *dc, struct dc_surface_update *updates, @@ -1631,6 +1685,9 @@ enum dc_irq_source dc_interrupt_to_irq_source( return dal_irq_service_to_irq_source(dc->res_pool->irqs, src_id, ext_id); } +/** + * dc_interrupt_set() - Enable/disable an AMD hw interrupt source + */ bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable) { @@ -1724,6 +1781,11 @@ static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct dc_sink return true; } +/** + * dc_link_add_remote_sink() - Create a sink and attach it to an existing link + * + * EDID length is in bytes + */ struct dc_sink *dc_link_add_remote_sink( struct dc_link *link, const uint8_t *edid, @@ -1782,6 +1844,12 @@ fail_add_sink: return NULL; } +/** + * dc_link_remove_remote_sink() - Remove a remote sink from a dc_link + * + * Note that this just removes the struct dc_sink - it doesn't + * program hardware or alter other members of dc_link + */ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) { int i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 948596a02392..4dc5846de5c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -198,6 +198,13 @@ static bool program_hpd_filter( return result; } +/** + * dc_link_detect_sink() - Determine if there is a sink connected + * + * @type: Returned connection type + * Does not detect downstream devices, such as MST sinks + * or display connected through active dongles + */ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) { uint32_t is_hpd_high = 0; @@ -324,9 +331,9 @@ static enum signal_type get_basic_signal_type( return SIGNAL_TYPE_NONE; } -/* - * @brief - * Check whether there is a dongle on DP connector +/** + * dc_link_is_dp_sink_present() - Check if there is a native DP + * or passive DP-HDMI dongle connected */ bool dc_link_is_dp_sink_present(struct dc_link *link) { @@ -593,6 +600,14 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid) return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0); } +/** + * dc_link_detect() - Detect if a sink is attached to a given link + * + * link->local_sink is created or destroyed as needed. + * + * This does not create remote sinks but will trigger DM + * to start MST detection if a branch is detected. + */ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) { struct dc_sink_init_data sink_init_data = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 0bb844a7b990..d4fd1d1357fe 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1447,6 +1447,14 @@ static bool are_stream_backends_same( return true; } +/** + * dc_is_stream_unchanged() - Compare two stream states for equivalence. + * + * Checks if there a difference between the two states + * that would require a mode change. + * + * Does not compare cursor position or attributes. + */ bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { @@ -1457,6 +1465,9 @@ bool dc_is_stream_unchanged( return true; } +/** + * dc_is_stream_scaling_unchanged() - Compare scaling rectangles of two streams. + */ bool dc_is_stream_scaling_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { @@ -1616,6 +1627,9 @@ bool resource_is_stream_unchanged( return false; } +/** + * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state. + */ enum dc_status dc_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, @@ -1640,6 +1654,9 @@ enum dc_status dc_add_stream_to_ctx( return res; } +/** + * dc_remove_stream_from_ctx() - Remove a stream from a dc_state. + */ enum dc_status dc_remove_stream_from_ctx( struct dc *dc, struct dc_state *new_ctx, @@ -1860,6 +1877,12 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } +/** + * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state + * Is a shallow copy. Increments refcounts on existing streams and planes. + * @dc: copy out of dc->current_state + * @dst_ctx: copy into this + */ void dc_resource_state_copy_construct_current( const struct dc *dc, struct dc_state *dst_ctx) @@ -1875,6 +1898,14 @@ void dc_resource_state_construct( dst_ctx->dccg = dc->res_pool->clk_mgr; } +/** + * dc_validate_global_state() - Determine if HW can support a given state + * Checks HW resource availability and bandwidth requirement. + * @dc: dc struct for this driver + * @new_ctx: state to be validated + * + * Return: DC_OK if the result can be programmed. Otherwise, an error code. + */ enum dc_status dc_validate_global_state( struct dc *dc, struct dc_state *new_ctx) @@ -2364,10 +2395,6 @@ void dc_resource_state_destruct(struct dc_state *context) } } -/* - * Copy src_ctx into dst_ctx and retain all surfaces and streams referenced - * by the src_ctx - */ void dc_resource_state_copy_construct( const struct dc_state *src_ctx, struct dc_state *dst_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 780838a05f44..66e5c4623a49 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -170,7 +170,7 @@ struct dc_stream_status *dc_stream_get_status( } /** - * Update the cursor attributes and set cursor surface address + * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address */ bool dc_stream_set_cursor_attributes( struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 8738f27a8708..29f19d57ff7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -128,8 +128,10 @@ struct dc_link { const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link); -/* - * Return an enumerated dc_link. dc_link order is constant and determined at +/** + * dc_get_link_at_index() - Return an enumerated dc_link. + * + * dc_link order is constant and determined at * boot time. They cannot be created or destroyed. * Use dc_get_caps() to get number of links. */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index e3ee96afa60e..b168a5e9dd9d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -272,6 +272,17 @@ union bw_context { struct dce_bw_output dce; }; +/** + * struct dc_state - The full description of a state requested by a user + * + * @streams: Stream properties + * @stream_status: The planes on a given stream + * @res_ctx: Persistent state of resources + * @bw: The output from bandwidth and watermark calculations + * @pp_display_cfg: PowerPlay clocks and settings + * @dcn_bw_vars: non-stack memory to support bandwidth calculations + * + */ struct dc_state { struct dc_stream_state *streams[MAX_PIPES]; struct dc_stream_status stream_status[MAX_PIPES]; @@ -279,7 +290,6 @@ struct dc_state { struct resource_context res_ctx; - /* The output from BW and WM calculations. */ union bw_context bw; /* Note: these are big structures, do *not* put on stack! */ From 02e056e870a19a598e27868dd5ff78e7af4efae4 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Fri, 2 Nov 2018 18:00:54 -0400 Subject: [PATCH 04/60] drm/amd/display: Remove unused panel patch "disconnect_delay" [Why] This patch is for use by dm, no need for it in dc. Signed-off-by: Joshua Aberback Reviewed-by: Jun Lei Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6e12d640d020..91911ef8d746 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -191,7 +191,6 @@ union display_content_support { }; struct dc_panel_patch { - unsigned int disconnect_delay; unsigned int dppowerup_delay; unsigned int extra_t12_ms; }; From 71f7f3e01bbc5543a498b709fbf221fca3ae58b6 Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Wed, 24 Oct 2018 15:33:50 -0400 Subject: [PATCH 05/60] drm/amd/display: Fix spelling of axis in modules/color/color_gamma.c Use axis instead of axix Signed-off-by: Krunoslav Kovac Reviewed-by: Aric Cyr Acked-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/modules/color/color_gamma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index bbecbaefb741..479b77c2e89e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1761,7 +1761,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, struct pwl_float_data *rgb_user = NULL; struct pwl_float_data_ex *curve = NULL; - struct gamma_pixel *axix_x = NULL; + struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; bool ret = false; @@ -1787,10 +1787,10 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, GFP_KERNEL); if (!curve) goto curve_alloc_fail; - axix_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axix_x), + axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x), GFP_KERNEL); - if (!axix_x) - goto axix_x_alloc_fail; + if (!axis_x) + goto axis_x_alloc_fail; coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), GFP_KERNEL); if (!coeff) @@ -1803,7 +1803,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, tf = input_tf->tf; build_evenly_distributed_points( - axix_x, + axis_x, ramp->num_entries, dividers); @@ -1828,7 +1828,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, tf_pts->x_point_at_y1_blue = 1; map_regamma_hw_to_x_user(ramp, coeff, rgb_user, - coordinates_x, axix_x, curve, + coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, mapUserRamp && ramp->type != GAMMA_CUSTOM); if (ramp->type == GAMMA_CUSTOM) @@ -1838,8 +1838,8 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, kvfree(coeff); coeff_alloc_fail: - kvfree(axix_x); -axix_x_alloc_fail: + kvfree(axis_x); +axis_x_alloc_fail: kvfree(curve); curve_alloc_fail: kvfree(rgb_user); From 242b0c8ffa5ea5c089b00a605747a1458bcb9c30 Mon Sep 17 00:00:00 2001 From: abdoulaye berthe Date: Fri, 2 Nov 2018 12:07:46 -0400 Subject: [PATCH 06/60] drm/amd/display: CTS 4.2.2.7 [Why] Failure to read Detailed Capabilities Info. [How] Read Detailed Capbilities Info 80h-08Fh. Signed-off-by: abdoulaye berthe Reviewed-by: Wenjing Liu Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 4d1f8ac069c1..849a3a3032f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2196,7 +2196,7 @@ static void get_active_converter_info( } if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_11) { - uint8_t det_caps[4]; + uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/ union dwnstream_port_caps_byte0 *port_caps = (union dwnstream_port_caps_byte0 *)det_caps; core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0, From ecd0136bfdb5a28b8a869c305823df9d663e85ee Mon Sep 17 00:00:00 2001 From: Harmanprit Tatla Date: Mon, 5 Nov 2018 17:55:53 -0500 Subject: [PATCH 07/60] drm/amd/display: Info frame cleanup * Use provided infopacket in stream (if valid) instead of reconstructing in set_vendor_info_packet() * Use proper format for enums * Use dc info packet struct instead Signed-off-by: Harmanprit Tatla Reviewed-by: Anthony Koo Acked-by: Krunoslav Kovac Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 110 +----------------- drivers/gpu/drm/amd/display/dc/dc_stream.h | 2 + .../amd/display/modules/freesync/freesync.c | 10 +- .../amd/display/modules/inc/mod_info_packet.h | 14 +-- .../drm/amd/display/modules/inc/mod_shared.h | 27 +++-- .../display/modules/info_packet/info_packet.c | 15 +-- 7 files changed, 42 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 32e791d9b9a8..bc3cf47a9a88 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -72,6 +72,7 @@ #include "modules/inc/mod_freesync.h" #include "modules/power/power_helpers.h" +#include "modules/inc/mod_info_packet.h" #define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU); @@ -2930,6 +2931,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (dm_state && dm_state->freesync_capable) stream->ignore_msa_timing_param = true; + finish: if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON) dc_sink_release(sink); @@ -4423,8 +4425,8 @@ static void update_freesync_state_on_stream( dm->freesync_module, new_stream, &vrr, - packet_type_vrr, - transfer_func_unknown, + PACKET_TYPE_VRR, + TRANSFER_FUNC_UNKNOWN, &vrr_infopacket); new_crtc_state->freesync_timing_changed = diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d4fd1d1357fe..c347afd1030f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2233,113 +2233,15 @@ static void set_vendor_info_packet( struct dc_info_packet *info_packet, struct dc_stream_state *stream) { - uint32_t length = 0; - bool hdmi_vic_mode = false; - uint8_t checksum = 0; - uint32_t i = 0; - enum dc_timing_3d_format format; - // Can be different depending on packet content /*todo*/ - // unsigned int length = pPathMode->dolbyVision ? 24 : 5; + /* SPD info packet for FreeSync */ - info_packet->valid = false; - - format = stream->timing.timing_3d_format; - if (stream->view_format == VIEW_3D_FORMAT_NONE) - format = TIMING_3D_FORMAT_NONE; - - /* Can be different depending on packet content */ - length = 5; - - if (stream->timing.hdmi_vic != 0 - && stream->timing.h_total >= 3840 - && stream->timing.v_total >= 2160) - hdmi_vic_mode = true; - - /* According to HDMI 1.4a CTS, VSIF should be sent - * for both 3D stereo and HDMI VIC modes. - * For all other modes, there is no VSIF sent. */ - - if (format == TIMING_3D_FORMAT_NONE && !hdmi_vic_mode) + /* Check if Freesync is supported. Return if false. If true, + * set the corresponding bit in the info packet + */ + if (!stream->vsp_infopacket.valid) return; - /* 24bit IEEE Registration identifier (0x000c03). LSB first. */ - info_packet->sb[1] = 0x03; - info_packet->sb[2] = 0x0C; - info_packet->sb[3] = 0x00; - - /*PB4: 5 lower bytes = 0 (reserved). 3 higher bits = HDMI_Video_Format. - * The value for HDMI_Video_Format are: - * 0x0 (0b000) - No additional HDMI video format is presented in this - * packet - * 0x1 (0b001) - Extended resolution format present. 1 byte of HDMI_VIC - * parameter follows - * 0x2 (0b010) - 3D format indication present. 3D_Structure and - * potentially 3D_Ext_Data follows - * 0x3..0x7 (0b011..0b111) - reserved for future use */ - if (format != TIMING_3D_FORMAT_NONE) - info_packet->sb[4] = (2 << 5); - else if (hdmi_vic_mode) - info_packet->sb[4] = (1 << 5); - - /* PB5: If PB4 claims 3D timing (HDMI_Video_Format = 0x2): - * 4 lower bites = 0 (reserved). 4 higher bits = 3D_Structure. - * The value for 3D_Structure are: - * 0x0 - Frame Packing - * 0x1 - Field Alternative - * 0x2 - Line Alternative - * 0x3 - Side-by-Side (full) - * 0x4 - L + depth - * 0x5 - L + depth + graphics + graphics-depth - * 0x6 - Top-and-Bottom - * 0x7 - Reserved for future use - * 0x8 - Side-by-Side (Half) - * 0x9..0xE - Reserved for future use - * 0xF - Not used */ - switch (format) { - case TIMING_3D_FORMAT_HW_FRAME_PACKING: - case TIMING_3D_FORMAT_SW_FRAME_PACKING: - info_packet->sb[5] = (0x0 << 4); - break; - - case TIMING_3D_FORMAT_SIDE_BY_SIDE: - case TIMING_3D_FORMAT_SBS_SW_PACKED: - info_packet->sb[5] = (0x8 << 4); - length = 6; - break; - - case TIMING_3D_FORMAT_TOP_AND_BOTTOM: - case TIMING_3D_FORMAT_TB_SW_PACKED: - info_packet->sb[5] = (0x6 << 4); - break; - - default: - break; - } - - /*PB5: If PB4 is set to 0x1 (extended resolution format) - * fill PB5 with the correct HDMI VIC code */ - if (hdmi_vic_mode) - info_packet->sb[5] = stream->timing.hdmi_vic; - - /* Header */ - info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR; /* VSIF packet type. */ - info_packet->hb1 = 0x01; /* Version */ - - /* 4 lower bits = Length, 4 higher bits = 0 (reserved) */ - info_packet->hb2 = (uint8_t) (length); - - /* Calculate checksum */ - checksum = 0; - checksum += info_packet->hb0; - checksum += info_packet->hb1; - checksum += info_packet->hb2; - - for (i = 1; i <= length; i++) - checksum += info_packet->sb[i]; - - info_packet->sb[0] = (uint8_t) (0x100 - checksum); - - info_packet->valid = true; + *info_packet = stream->vsp_infopacket; } static void set_spd_info_packet( diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 771d9f17e26e..0c42418b0b3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -56,6 +56,7 @@ struct dc_stream_state { struct dc_crtc_timing_adjust adjust; struct dc_info_packet vrr_infopacket; struct dc_info_packet vsc_infopacket; + struct dc_info_packet vsp_infopacket; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -129,6 +130,7 @@ struct dc_stream_update { struct dc_crtc_timing_adjust *adjust; struct dc_info_packet *vrr_infopacket; struct dc_info_packet *vsc_infopacket; + struct dc_info_packet *vsp_infopacket; bool *dpms_off; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 620a171620ee..1544ed3f1747 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -608,12 +608,12 @@ static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr, static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, struct dc_info_packet *infopacket) { - if (app_tf != transfer_func_unknown) { + if (app_tf != TRANSFER_FUNC_UNKNOWN) { infopacket->valid = true; infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] - if (app_tf == transfer_func_gamma_22) { + if (app_tf == TRANSFER_FUNC_GAMMA_22) { infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] } } @@ -688,11 +688,11 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, return; switch (packet_type) { - case packet_type_fs2: + case PACKET_TYPE_FS2: build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket); break; - case packet_type_vrr: - case packet_type_fs1: + case PACKET_TYPE_VRR: + case PACKET_TYPE_FS1: default: build_vrr_infopacket_v1(stream->signal, vrr, infopacket); } diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 786b34380f85..5b1c9a4c7643 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -26,15 +26,13 @@ #ifndef MOD_INFO_PACKET_H_ #define MOD_INFO_PACKET_H_ -struct info_packet_inputs { - const struct dc_stream_state *pStream; -}; +#include "mod_shared.h" -struct info_packets { - struct dc_info_packet *pVscInfoPacket; -}; +//Forward Declarations +struct dc_stream_state; +struct dc_info_packet; -void mod_build_infopackets(struct info_packet_inputs *inputs, - struct info_packets *info_packets); +void mod_build_vsc_infopacket(const struct dc_stream_state *stream, + struct dc_info_packet *info_packet); #endif diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h index 238c431ae483..1bd02c0ac30c 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h @@ -23,27 +23,26 @@ * */ - #ifndef MOD_SHARED_H_ #define MOD_SHARED_H_ enum color_transfer_func { - transfer_func_unknown, - transfer_func_srgb, - transfer_func_bt709, - transfer_func_pq2084, - transfer_func_pq2084_interim, - transfer_func_linear_0_1, - transfer_func_linear_0_125, - transfer_func_dolbyvision, - transfer_func_gamma_22, - transfer_func_gamma_26 + TRANSFER_FUNC_UNKNOWN, + TRANSFER_FUNC_SRGB, + TRANSFER_FUNC_BT709, + TRANSFER_FUNC_PQ2084, + TRANSFER_FUNC_PQ2084_INTERIM, + TRANSFER_FUNC_LINEAR_0_1, + TRANSFER_FUNC_LINEAR_0_125, + TRANSFER_FUNC_GAMMA_22, + TRANSFER_FUNC_GAMMA_26 }; enum vrr_packet_type { - packet_type_vrr, - packet_type_fs1, - packet_type_fs2 + PACKET_TYPE_VRR, + PACKET_TYPE_FS1, + PACKET_TYPE_FS2 }; + #endif /* MOD_SHARED_H_ */ diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index ff8bfb9b43b0..db06fab2ad5c 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -25,6 +25,10 @@ #include "mod_info_packet.h" #include "core_types.h" +#include "dc_types.h" +#include "mod_shared.h" + +#define HDMI_INFOFRAME_TYPE_VENDOR 0x81 enum ColorimetryRGBDP { ColorimetryRGB_DP_sRGB = 0, @@ -41,7 +45,7 @@ enum ColorimetryYCCDP { ColorimetryYCC_DP_ITU2020YCbCr = 7, }; -static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, +void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet) { unsigned int vscPacketRevision = 0; @@ -159,7 +163,7 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, * DPCD register is exposed in the new Extended Receiver Capability field for DPCD Rev. 1.4 * (and higher). When MISC1. bit 6. is Set to 1, a Source device uses a VSC SDP to indicate * the Pixel Encoding/Colorimetry Format and that a Sink device must ignore MISC1, bit 7, and - * MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become “don’t care”).) + * MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become "don't care").) */ if (vscPacketRevision == 0x5) { /* Secondary-data Packet ID = 0 */ @@ -320,10 +324,3 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } -void mod_build_infopackets(struct info_packet_inputs *inputs, - struct info_packets *info_packets) -{ - if (info_packets->pVscInfoPacket != NULL) - mod_build_vsc_infopacket(inputs->pStream, info_packets->pVscInfoPacket); -} - From 65d38262b3e82fc795464e48ee88cb66fd4c85aa Mon Sep 17 00:00:00 2001 From: hersen wu Date: Tue, 30 Oct 2018 16:39:15 -0400 Subject: [PATCH 08/60] drm/amd/display: fbc state could not reach while enable fbc [WHY] fbc is within the data path from memory to dce. while re-configure mc dmif, fbc should be enabled. otherwise, fbc may not be enabled properly. [HOW] before re-configure mc dmif, disable fbc, only after dmif re-configuration fully done, enable fbc again. Signed-off-by: hersen wu Reviewed-by: Roman Li Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/dc/dce110/dce110_compressor.c | 91 +++++++------------ .../display/dc/dce110/dce110_hw_sequencer.c | 57 +++++++----- .../gpu/drm/amd/display/dc/inc/compressor.h | 1 + 3 files changed, 66 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c index 1f7f25013217..52d50e24a995 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c @@ -64,65 +64,37 @@ static const struct dce110_compressor_reg_offsets reg_offsets[] = { static const uint32_t dce11_one_lpt_channel_max_resolution = 2560 * 1600; -enum fbc_idle_force { - /* Bit 0 - Display registers updated */ - FBC_IDLE_FORCE_DISPLAY_REGISTER_UPDATE = 0x00000001, - - /* Bit 2 - FBC_GRPH_COMP_EN register updated */ - FBC_IDLE_FORCE_GRPH_COMP_EN = 0x00000002, - /* Bit 3 - FBC_SRC_SEL register updated */ - FBC_IDLE_FORCE_SRC_SEL_CHANGE = 0x00000004, - /* Bit 4 - FBC_MIN_COMPRESSION register updated */ - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE = 0x00000008, - /* Bit 5 - FBC_ALPHA_COMP_EN register updated */ - FBC_IDLE_FORCE_ALPHA_COMP_EN = 0x00000010, - /* Bit 6 - FBC_ZERO_ALPHA_CHUNK_SKIP_EN register updated */ - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN = 0x00000020, - /* Bit 7 - FBC_FORCE_COPY_TO_COMP_BUF register updated */ - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF = 0x00000040, - - /* Bit 24 - Memory write to region 0 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION0 = 0x01000000, - /* Bit 25 - Memory write to region 1 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION1 = 0x02000000, - /* Bit 26 - Memory write to region 2 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION2 = 0x04000000, - /* Bit 27 - Memory write to region 3 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION3 = 0x08000000, - - /* Bit 28 - Memory write from any client other than MCIF */ - FBC_IDLE_FORCE_MEMORY_WRITE_OTHER_THAN_MCIF = 0x10000000, - /* Bit 29 - CG statics screen signal is inactive */ - FBC_IDLE_FORCE_CG_STATIC_SCREEN_IS_INACTIVE = 0x20000000, -}; - - static uint32_t align_to_chunks_number_per_line(uint32_t pixels) { return 256 * ((pixels + 255) / 256); } -static void reset_lb_on_vblank(struct dc_context *ctx) +static void reset_lb_on_vblank(struct compressor *compressor, uint32_t crtc_inst) { - uint32_t value, frame_count; + uint32_t value; + uint32_t frame_count; + uint32_t status_pos; uint32_t retry = 0; - uint32_t status_pos = - dm_read_reg(ctx, mmCRTC_STATUS_POSITION); + struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor); + + cp110->offsets = reg_offsets[crtc_inst]; + + status_pos = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION)); /* Only if CRTC is enabled and counter is moving we wait for one frame. */ - if (status_pos != dm_read_reg(ctx, mmCRTC_STATUS_POSITION)) { + if (status_pos != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION))) { /* Resetting LB on VBlank */ - value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL); + value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL)); set_reg_field_value(value, 3, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL); set_reg_field_value(value, 1, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2); - dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value); + dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value); - frame_count = dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT); + frame_count = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT)); for (retry = 10000; retry > 0; retry--) { - if (frame_count != dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT)) + if (frame_count != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT))) break; udelay(10); } @@ -130,13 +102,11 @@ static void reset_lb_on_vblank(struct dc_context *ctx) dm_error("Frame count did not increase for 100ms.\n"); /* Resetting LB on VBlank */ - value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL); + value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL)); set_reg_field_value(value, 2, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL); set_reg_field_value(value, 0, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2); - dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value); - + dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value); } - } static void wait_for_fbc_state_changed( @@ -226,10 +196,10 @@ void dce110_compressor_enable_fbc( uint32_t addr; uint32_t value, misc_value; - addr = mmFBC_CNTL; value = dm_read_reg(compressor->ctx, addr); set_reg_field_value(value, 1, FBC_CNTL, FBC_GRPH_COMP_EN); + /* params->inst is valid HW CRTC instance start from 0 */ set_reg_field_value( value, params->inst, @@ -238,8 +208,10 @@ void dce110_compressor_enable_fbc( /* Keep track of enum controller_id FBC is attached to */ compressor->is_enabled = true; - compressor->attached_inst = params->inst; - cp110->offsets = reg_offsets[params->inst]; + /* attached_inst is SW CRTC instance start from 1 + * 0 = CONTROLLER_ID_UNDEFINED means not attached crtc + */ + compressor->attached_inst = params->inst + CONTROLLER_ID_D0; /* Toggle it as there is bug in HW */ set_reg_field_value(value, 0, FBC_CNTL, FBC_GRPH_COMP_EN); @@ -268,9 +240,10 @@ void dce110_compressor_enable_fbc( void dce110_compressor_disable_fbc(struct compressor *compressor) { struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor); + uint32_t crtc_inst = 0; if (compressor->options.bits.FBC_SUPPORT) { - if (dce110_compressor_is_fbc_enabled_in_hw(compressor, NULL)) { + if (dce110_compressor_is_fbc_enabled_in_hw(compressor, &crtc_inst)) { uint32_t reg_data; /* Turn off compression */ reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL); @@ -284,8 +257,10 @@ void dce110_compressor_disable_fbc(struct compressor *compressor) wait_for_fbc_state_changed(cp110, false); } - /* Sync line buffer - dce100/110 only*/ - reset_lb_on_vblank(compressor->ctx); + /* Sync line buffer which fbc was attached to dce100/110 only */ + if (crtc_inst > CONTROLLER_ID_UNDEFINED && crtc_inst < CONTROLLER_ID_D3) + reset_lb_on_vblank(compressor, + crtc_inst - CONTROLLER_ID_D0); } } @@ -328,6 +303,8 @@ void dce110_compressor_program_compressed_surface_address_and_pitch( uint32_t compressed_surf_address_low_part = compressor->compr_surface_address.addr.low_part; + cp110->offsets = reg_offsets[params->inst]; + /* Clear content first. */ dm_write_reg( compressor->ctx, @@ -410,13 +387,7 @@ void dce110_compressor_set_fbc_invalidation_triggers( value = dm_read_reg(compressor->ctx, addr); set_reg_field_value( value, - fbc_trigger | - FBC_IDLE_FORCE_GRPH_COMP_EN | - FBC_IDLE_FORCE_SRC_SEL_CHANGE | - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE | - FBC_IDLE_FORCE_ALPHA_COMP_EN | - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN | - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF, + fbc_trigger, FBC_IDLE_FORCE_CLEAR_MASK, FBC_IDLE_FORCE_CLEAR_MASK); dm_write_reg(compressor->ctx, addr, value); @@ -549,7 +520,7 @@ void dce110_compressor_construct(struct dce110_compressor *compressor, compressor->base.channel_interleave_size = 0; compressor->base.dram_channels_num = 0; compressor->base.lpt_channels_num = 0; - compressor->base.attached_inst = 0; + compressor->base.attached_inst = CONTROLLER_ID_UNDEFINED; compressor->base.is_enabled = false; compressor->base.funcs = &dce110_compressor_funcs; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 2f062bacd78a..6349ba7bec7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1766,12 +1766,13 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, * Check if FBC can be enabled */ static bool should_enable_fbc(struct dc *dc, - struct dc_state *context, - uint32_t *pipe_idx) + struct dc_state *context, + uint32_t *pipe_idx) { uint32_t i; struct pipe_ctx *pipe_ctx = NULL; struct resource_context *res_ctx = &context->res_ctx; + unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; ASSERT(dc->fbc_compressor); @@ -1786,14 +1787,28 @@ static bool should_enable_fbc(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { if (res_ctx->pipe_ctx[i].stream) { + pipe_ctx = &res_ctx->pipe_ctx[i]; - *pipe_idx = i; - break; + + if (!pipe_ctx) + continue; + + /* fbc not applicable on underlay pipe */ + if (pipe_ctx->pipe_idx != underlay_idx) { + *pipe_idx = i; + break; + } } } - /* Pipe context should be found */ - ASSERT(pipe_ctx); + if (i == dc->res_pool->pipe_count) + return false; + + if (!pipe_ctx->stream->sink) + return false; + + if (!pipe_ctx->stream->sink->link) + return false; /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) @@ -1817,8 +1832,9 @@ static bool should_enable_fbc(struct dc *dc, /* * Enable FBC */ -static void enable_fbc(struct dc *dc, - struct dc_state *context) +static void enable_fbc( + struct dc *dc, + struct dc_state *context) { uint32_t pipe_idx = 0; @@ -1828,10 +1844,9 @@ static void enable_fbc(struct dc *dc, struct compressor *compr = dc->fbc_compressor; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; - params.source_view_width = pipe_ctx->stream->timing.h_addressable; params.source_view_height = pipe_ctx->stream->timing.v_addressable; - + params.inst = pipe_ctx->stream_res.tg->inst; compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr; compr->funcs->surface_address_and_pitch(compr, ¶ms); @@ -2046,10 +2061,10 @@ enum dc_status dce110_apply_ctx_to_hw( return status; } - dcb->funcs->set_scratch_critical_state(dcb, false); - if (dc->fbc_compressor) - enable_fbc(dc, context); + enable_fbc(dc, dc->current_state); + + dcb->funcs->set_scratch_critical_state(dcb, false); return DC_OK; } @@ -2408,7 +2423,6 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; - unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; unsigned int i; DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); @@ -2449,15 +2463,6 @@ static void dce110_program_front_end_for_pipe( program_scaler(dc, pipe_ctx); - /* fbc not applicable on Underlay pipe */ - if (dc->fbc_compressor && old_pipe->stream && - pipe_ctx->pipe_idx != underlay_idx) { - if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) - dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); - else - enable_fbc(dc, dc->current_state); - } - mi->funcs->mem_input_program_surface_config( mi, plane_state->format, @@ -2534,6 +2539,9 @@ static void dce110_apply_ctx_for_surface( if (num_planes == 0) return; + if (dc->fbc_compressor) + dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -2576,6 +2584,9 @@ static void dce110_apply_ctx_for_surface( (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) dc->hwss.pipe_control_lock(dc, pipe_ctx, false); } + + if (dc->fbc_compressor) + enable_fbc(dc, dc->current_state); } static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/inc/compressor.h b/drivers/gpu/drm/amd/display/dc/inc/compressor.h index bcb18f5e1e60..7a147a9762a0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/compressor.h +++ b/drivers/gpu/drm/amd/display/dc/inc/compressor.h @@ -77,6 +77,7 @@ struct compressor_funcs { }; struct compressor { struct dc_context *ctx; + /* CONTROLLER_ID_D0 + instance, CONTROLLER_ID_UNDEFINED = 0 */ uint32_t attached_inst; bool is_enabled; const struct compressor_funcs *funcs; From 1cc9f371fa3e37ec45ac60c2e334cb2373913dad Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Thu, 8 Nov 2018 19:20:11 -0500 Subject: [PATCH 09/60] drm/amd/display: Re-arrange GFX9 fields For more clear usage in future Signed-off-by: Nevenko Stupar Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 7825e4b5e97c..9ddfe4c6938b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -358,15 +358,16 @@ union dc_tiling_info { } gfx8; struct { + enum swizzle_mode_values swizzle; unsigned int num_pipes; - unsigned int num_banks; + unsigned int max_compressed_frags; unsigned int pipe_interleave; + + unsigned int num_banks; unsigned int num_shader_engines; unsigned int num_rb_per_se; - unsigned int max_compressed_frags; bool shaderEnable; - enum swizzle_mode_values swizzle; bool meta_linear; bool rb_aligned; bool pipe_aligned; From 6de89f79ea7ad4804e3608863ffe3a155917b8b9 Mon Sep 17 00:00:00 2001 From: Chiawen Huang Date: Fri, 9 Nov 2018 11:44:38 +0800 Subject: [PATCH 10/60] drm/amd/display: Add customizable tracing event [why] add customizable log with a message input, which is for adding test log in debugging as printf function in ETW. [Usage] EVENT_LOG_CUST_MSG1("TestLog","Hello World %d=0x%x", 123, pDC); Signed-off-by: Chiawen Huang Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dm_event_log.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dm_event_log.h b/drivers/gpu/drm/amd/display/dc/dm_event_log.h index 34a701ca879e..65663f4d93e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_event_log.h +++ b/drivers/gpu/drm/amd/display/dc/dm_event_log.h @@ -33,6 +33,7 @@ #define EVENT_LOG_AUX_REQ(ddc, type, action, address, len, data) #define EVENT_LOG_AUX_REP(ddc, type, replyStatus, len, data) +#define EVENT_LOG_CUST_MSG(tag, a, ...) #endif From ed20dc0d8ca8285458f57e160ee3f99c6a9d8c48 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 8 Nov 2018 16:19:22 -0500 Subject: [PATCH 11/60] drm/amd/display: Copy crc_enabled when duplicating dm_crtc_state [Why] When running igt@kms_plane@pixel-format-pipe-* tests the CRC read will time out and the test will fail. This is because the CRTC is duplicated but the crc_enabled parameter isn't copied over to the new dm_crtc_state. CRC reads will time out because amdgpu_dm_crtc_handle_crc_irq will no longer call drm_crtc_add_crc_entry. [How] Copy crc_enabled when duplicating the state. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Reviewed-by: Sun peng Li Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bc3cf47a9a88..55e9b1249dd4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3003,6 +3003,7 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) state->abm_level = cur->abm_level; state->vrr_supported = cur->vrr_supported; state->freesync_config = cur->freesync_config; + state->crc_enabled = cur->crc_enabled; /* TODO Duplicate dc_stream after objects are stream object is flattened */ From 8ccb596fc574bae614aea9a532d8993e0f400f7e Mon Sep 17 00:00:00 2001 From: SivapiriyanKumarasamy Date: Wed, 7 Nov 2018 14:59:41 -0500 Subject: [PATCH 12/60] drm/amd/display: Program dithering if requested Dithering needs to be enabled or disabled as requested. If dc_stream_update->dither_option is non-null, program the FMT blocks. Signed-off-by: SivapiriyanKumarasamy Reviewed-by: Anthony Koo Reviewed-by: Krunoslav Kovac Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 ++++++++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8edd0309255b..8a182cb35c8f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1463,6 +1463,14 @@ static void commit_planes_do_stream_update(struct dc *dc, if (stream_update->output_csc_transform) dc_stream_program_csc_matrix(dc, stream); + if (stream_update->dither_option) { + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + pipe_ctx->stream_res.opp->funcs->opp_program_fmt(pipe_ctx->stream_res.opp, + &stream->bit_depth_params, + &stream->clamping); + } + /* Full fe update*/ if (update_type == UPDATE_TYPE_FAST) continue; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 0c42418b0b3d..be34d638e15d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -136,6 +136,7 @@ struct dc_stream_update { struct colorspace_transform *gamut_remap; enum dc_color_space *output_color_space; + enum dc_dither_option *dither_option; struct dc_csc_transform *output_csc_transform; From fa3547dd92deff8624a18621035e876f94f24a1b Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 9 Nov 2018 11:50:18 -0500 Subject: [PATCH 13/60] drm/amd/display: Allow clock lower on dce100 dce100 was set to always pass safe_to_lower = false to the clock manager Thus, on suspend the clocks were not being set to 0 which is incorrect behaviour This was causing s3 resume to blackscreen on intel CPUs with dce100 GPUs attached (Note that the hash in this Fixes: tag is the hash on Alex's tree) Fixes: ae7d8aeb38d7 ("drm/amd/display: remove safe_to_lower flag from dc, use 2 functions instead") Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/dc/dce100/dce100_hw_sequencer.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c index bc50a8e25f4f..87771676acac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c @@ -117,6 +117,18 @@ void dce100_prepare_bandwidth( false); } +void dce100_optimize_bandwidth( + struct dc *dc, + struct dc_state *context) +{ + dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); + + dc->res_pool->clk_mgr->funcs->update_clocks( + dc->res_pool->clk_mgr, + context, + true); +} + /**************************************************************************/ void dce100_hw_sequencer_construct(struct dc *dc) @@ -125,6 +137,6 @@ void dce100_hw_sequencer_construct(struct dc *dc) dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating; dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth; - dc->hwss.optimize_bandwidth = dce100_prepare_bandwidth; + dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth; } From e96938a09dce68356654186f4ac0a31837e1da6f Mon Sep 17 00:00:00 2001 From: Steven Chiu Date: Mon, 12 Nov 2018 13:22:36 -0500 Subject: [PATCH 14/60] drm/amd/display: 3.2.08 Signed-off-by: Steven Chiu Reviewed-by: Fatemeh Darbehani Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 70873d28f02a..4b5bbb13ce7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -39,7 +39,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.07" +#define DC_VER "3.2.08" #define MAX_SURFACES 3 #define MAX_STREAMS 6 From e2101675225fa45cf6994916c2051c5167ded3e2 Mon Sep 17 00:00:00 2001 From: Fatemeh Darbehani Date: Tue, 30 Oct 2018 11:32:40 -0400 Subject: [PATCH 15/60] drm/amd/display: Clean up for DCN1 clock debug logging [Why] To prepare for clock debug logging. With the exception of removing max_supported_dppclk_khz from logs, there are no functional changes. [How] Add clk_bypass struct and clean up buffer logic Signed-off-by: Fatemeh Darbehani Reviewed-by: Yongqiang Sun Acked-by: Su Chung Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_clk_mgr.c | 4 +- .../drm/amd/display/dc/dcn10/dcn10_clk_mgr.h | 6 +++ .../dc/dcn10/dcn10_hw_sequencer_debug.c | 39 +++++++++++-------- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 4 -- drivers/gpu/drm/amd/display/dc/dm_pp_smu.h | 2 +- 5 files changed, 32 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c index f9d7d2c26cc2..54abedbf1b43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -328,12 +328,10 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr, *smu_req_cur = smu_req; } - static const struct clk_mgr_funcs dcn1_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn1_update_clocks }; - struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx) { struct dc_debug_options *debug = &ctx->dc->debug; @@ -373,3 +371,5 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx) return &clk_mgr_dce->base; } + + diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h index 9dbaf6578006..a995eda443a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h @@ -28,6 +28,12 @@ #include "../dce/dce_clk_mgr.h" +struct clk_bypass { + uint32_t dcfclk_bypass; + uint32_t dispclk_pypass; + uint32_t dprefclk_bypass; +}; + void dcn1_pplib_apply_display_requirements( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 211bb240a720..cd469014baa3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -44,6 +44,7 @@ #include "dcn10_hubp.h" #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" +#include "dcn10_clk_mgr.h" static unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...) { @@ -463,19 +464,22 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int static unsigned int dcn10_get_clock_states(struct dc *dc, char *pBuf, unsigned int bufSize) { unsigned int chars_printed = 0; + unsigned int remaining_buffer = bufSize; - chars_printed = snprintf_count(pBuf, bufSize, "dcfclk_khz,dcfclk_deep_sleep_khz,dispclk_khz," - "dppclk_khz,max_supported_dppclk_khz,fclk_khz,socclk_khz\n" - "%d,%d,%d,%d,%d,%d,%d\n", + chars_printed = snprintf_count(pBuf, bufSize, "dcfclk,dcfclk_deep_sleep,dispclk," + "dppclk,fclk,socclk\n" + "%d,%d,%d,%d,%d,%d\n", dc->current_state->bw.dcn.clk.dcfclk_khz, dc->current_state->bw.dcn.clk.dcfclk_deep_sleep_khz, dc->current_state->bw.dcn.clk.dispclk_khz, dc->current_state->bw.dcn.clk.dppclk_khz, - dc->current_state->bw.dcn.clk.max_supported_dppclk_khz, dc->current_state->bw.dcn.clk.fclk_khz, dc->current_state->bw.dcn.clk.socclk_khz); - return chars_printed; + remaining_buffer -= chars_printed; + pBuf += chars_printed; + + return bufSize - remaining_buffer; } static void dcn10_clear_otpc_underflow(struct dc *dc) @@ -538,16 +542,16 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne * Bit 0 - 15: Hardware block mask * Bit 15: 1 = Invariant Only, 0 = All */ - const unsigned int DC_HW_STATE_MASK_HUBBUB = 0x1; - const unsigned int DC_HW_STATE_MASK_HUBP = 0x2; - const unsigned int DC_HW_STATE_MASK_RQ = 0x4; - const unsigned int DC_HW_STATE_MASK_DLG = 0x8; - const unsigned int DC_HW_STATE_MASK_TTU = 0x10; - const unsigned int DC_HW_STATE_MASK_CM = 0x20; - const unsigned int DC_HW_STATE_MASK_MPCC = 0x40; - const unsigned int DC_HW_STATE_MASK_OTG = 0x80; - const unsigned int DC_HW_STATE_MASK_CLOCKS = 0x100; - const unsigned int DC_HW_STATE_INVAR_ONLY = 0x8000; + const unsigned int DC_HW_STATE_MASK_HUBBUB = 0x1; + const unsigned int DC_HW_STATE_MASK_HUBP = 0x2; + const unsigned int DC_HW_STATE_MASK_RQ = 0x4; + const unsigned int DC_HW_STATE_MASK_DLG = 0x8; + const unsigned int DC_HW_STATE_MASK_TTU = 0x10; + const unsigned int DC_HW_STATE_MASK_CM = 0x20; + const unsigned int DC_HW_STATE_MASK_MPCC = 0x40; + const unsigned int DC_HW_STATE_MASK_OTG = 0x80; + const unsigned int DC_HW_STATE_MASK_CLOCKS = 0x100; + const unsigned int DC_HW_STATE_INVAR_ONLY = 0x8000; unsigned int chars_printed = 0; unsigned int remaining_buf_size = bufSize; @@ -603,6 +607,9 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne remaining_buf_size -= chars_printed; } - if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0) + if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0) { chars_printed = dcn10_get_clock_states(dc, pBuf, remaining_buf_size); + pBuf += chars_printed; + remaining_buf_size -= chars_printed; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 47dbe4bb294a..5d4772dec0ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -202,7 +202,6 @@ enum dcn10_clk_src_array_id { #define MMHUB_SR(reg_name)\ .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ mm ## reg_name - /* macros to expend register list macro defined in HW object header file * end *********************/ @@ -436,7 +435,6 @@ static const struct dcn_optc_mask tg_mask = { TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; - static const struct bios_registers bios_regs = { NBIO_SR(BIOS_SCRATCH_0), NBIO_SR(BIOS_SCRATCH_3), @@ -497,7 +495,6 @@ static const struct dce110_clk_src_mask cs_mask = { CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; - static const struct resource_caps res_cap = { .num_timing_generator = 4, .num_opp = 4, @@ -1277,7 +1274,6 @@ static bool construct( goto fail; } } - pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index beb08fd12b1d..0029a39efb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -102,7 +102,7 @@ struct pp_smu_funcs_rv { */ void (*set_display_count)(struct pp_smu *pp, int count); - /* which SMU message? are reader and writer WM separate SMU msg? */ + /* reader and writer WM's are sent together as part of one table*/ /* * PPSMC_MSG_SetDriverDramAddrHigh * PPSMC_MSG_SetDriverDramAddrLow From 47622ba033d6da529ff37d7e5238359a699496ab Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 30 Nov 2018 15:29:43 -0500 Subject: [PATCH 16/60] drm/amdgpu: add a xgmi supported flag Use this to track whether an asic supports xgmi rather than checking the asic type everywhere. Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 8c57924c075f..81e6070d255b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -99,6 +99,7 @@ struct amdgpu_xgmi { unsigned num_physical_nodes; /* gpu list in the same hive */ struct list_head head; + bool supported; }; struct amdgpu_gmc { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index fb37e69f1bba..f8c86d0593dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -94,9 +94,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) int count = 0, ret = -EINVAL; - if ((adev->asic_type < CHIP_VEGA20) || - (adev->flags & AMD_IS_APU) ) + if (!adev->gmc.xgmi.supported) return 0; + adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a4e5d8d5162..ed3145b2a596 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -934,7 +934,7 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); - if (adev->asic_type == CHIP_VEGA20) { + if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b318950ebbaa..f2cd87dc365a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -500,6 +500,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } + if (adev->asic_type == CHIP_VEGA20) + adev->gmc.xgmi.supported = true; + if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; else if (adev->asic_type == CHIP_VEGA20) From 1245adf31569e117d11b4ac5839e7b6c53d21186 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 7 Nov 2018 09:23:26 +0100 Subject: [PATCH 17/60] drm/amdgpu: remove amdgpu_bo_backup_to_shadow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is unused. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 47 ---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 5 --- 2 files changed, 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cf768acb51dc..cc50cb65c212 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -607,53 +607,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; } -/** - * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: &amdgpu_bo buffer to be backed up - * @resv: reservation object with embedded fence - * @fence: dma_fence associated with the operation - * @direct: whether to submit the job directly - * - * Copies an &amdgpu_bo buffer object to its shadow object. - * Not used for now. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct) - -{ - struct amdgpu_bo *shadow = bo->shadow; - uint64_t bo_addr, shadow_addr; - int r; - - if (!shadow) - return -EINVAL; - - bo_addr = amdgpu_bo_gpu_offset(bo); - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); - - r = reservation_object_reserve_shared(bo->tbo.resv, 1); - if (r) - goto err; - - r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, - amdgpu_bo_size(bo), resv, fence, - direct, false); - if (!r) - amdgpu_bo_fence(bo, *fence, true); - -err: - return r; -} - /** * amdgpu_bo_validate - validate an &amdgpu_bo buffer object * @bo: pointer to the buffer object diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7d3312d0da11..9291c2f837e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, bool direct); int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); From 30da7bb18471327fbeddb976de2b21fd9e34c36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 14:17:03 +0200 Subject: [PATCH 18/60] drm/amdgpu: add missing error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We ignored the return code here. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ed3145b2a596..016c7aab4a29 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -902,6 +902,9 @@ static int gmc_v9_0_sw_init(void *handle) /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); From 38cd8a280d725cd4e0be14b0fbc2797c26cd9de5 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 30 Nov 2018 12:24:33 -0500 Subject: [PATCH 19/60] drm/amdgpu/psp: Update waiting in psp mode1 reset. No point in use mdelay unless running from interrupt context (which we are not) This is busy wait which will block the CPU for the entirety of the wait time. Also, reduce wait time to 500ms as it is done in refernce code because 1s might cause PSP FW TO issues during XGMI hive reset. Signed-off-by: Andrey Grodzovsky Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index e5dd052d9e06..2b19616cb402 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -547,7 +547,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) /*send the mode 1 reset command*/ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 7efb823dd3b1..7357fd56e614 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -592,7 +592,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) /*send the mode 1 reset command*/ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); From a82400b57abb6aff068bb3b21d1cccd63acbb863 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 29 Nov 2018 12:21:53 -0500 Subject: [PATCH 20/60] drm/amdgpu: Handle xgmi device removal. XGMI hive has some resources allocted on device init which needs to be deallocated when the device is unregistered. v2: Remove creation of dedicated wq for XGMI hive reset. v3: Use the gmc.xgmi.supported flag Signed-off-by: Andrey Grodzovsky Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 + 3 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c75badfa5c4c..bfd286c40631 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1864,6 +1864,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_device_fini(adev); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index f8c86d0593dd..1b15ff3266b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -135,3 +135,23 @@ exit: mutex_unlock(&xgmi_mutex); return ret; } + +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive; + + if (!adev->gmc.xgmi.supported) + return; + + mutex_lock(&xgmi_mutex); + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + goto exit; + + if (!(hive->number_devices--)) + mutex_destroy(&hive->hive_lock); + +exit: + mutex_unlock(&xgmi_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6335bfdcc51d..6151eb9c8ad3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -35,5 +35,6 @@ struct amdgpu_hive_info { struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); #endif From d4535e2c018bba71b49edeb5e396183920f5d341 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Thu, 29 Nov 2018 15:14:27 -0500 Subject: [PATCH 21/60] drm/amdgpu: Implement concurrent asic reset for XGMI. Use per hive wq to concurrently send reset commands to all nodes in the hive. v2: Switch to system_highpri_wq after dropping dedicated queue. Fix non XGMI code path KASAN error. Stop the hive reset for each node loop if there is a reset failure on any of the nodes. Signed-off-by: Andrey Grodzovsky Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 44 +++++++++++++++++++--- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8ad6bf6618a..6fc023bae7fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -910,7 +910,9 @@ struct amdgpu_device { bool in_gpu_reset; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + int asic_reset_res; + struct work_struct xgmi_reset_work; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bfd286c40631..9fd9f63adc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2356,6 +2356,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } + +static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = + container_of(__work, struct amdgpu_device, xgmi_reset_work); + + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) + DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + adev->asic_reset_res, adev->ddev->unique); +} + + /** * amdgpu_device_init - initialize the driver * @@ -2454,6 +2467,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; @@ -3331,10 +3346,31 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, */ if (need_full_reset) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - r = amdgpu_asic_reset(tmp_adev); - if (r) - DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", r, tmp_adev->ddev->unique); + break; + } + } + + /* For XGMI wait for all PSP resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } } } @@ -3521,8 +3557,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; - dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique); - amdgpu_device_lock_adev(tmp_adev); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, From 5d66ef38bc9df8964ec7b6eb5091dae5e8fe67b4 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Mon, 3 Dec 2018 15:00:39 -0500 Subject: [PATCH 22/60] drm/amdgpu: Update XGMI node print amdgpu_xgmi_update_topology is called both on device registration and reset. Fix misleading print since the device is added only once to the hive on registration and not on reset. Signed-off-by: Andrey Grodzovsky Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1b15ff3266b1..0b263a9857c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -78,7 +78,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); else - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", + dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n", adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); From 0cf5eb76e2b453b3d159082eb4707c03a1686cf7 Mon Sep 17 00:00:00 2001 From: David Francis Date: Fri, 30 Nov 2018 09:57:06 -0500 Subject: [PATCH 23/60] drm/amd/display: Add tracing to dc [Why] Tracing is a useful and cheap debug functionality [How] This creates a new trace system amdgpu_dm, currently with three trace events amdgpu_dc_rreg and amdgpu_dc_wreg report the address and value of any dc register reads and writes amdgpu_dc_performance requires at least one of those two to be enabled. It counts the register reads and writes since the last entry v2: Don't check for NULL before kfree Signed-off-by: David Francis Reviewed-by: Harry Wentland Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 + .../amd/display/amdgpu_dm/amdgpu_dm_trace.h | 104 ++++++++++++++++++ drivers/gpu/drm/amd/display/dc/core/dc.c | 19 ++++ drivers/gpu/drm/amd/display/dc/dc_types.h | 8 ++ .../amd/display/dc/dcn10/dcn10_cm_common.c | 4 +- drivers/gpu/drm/amd/display/dc/dm_services.h | 12 +- 6 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 55e9b1249dd4..943d1ae1de15 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -23,6 +23,9 @@ * */ +/* The caprices of the preprocessor require that this be declared right here */ +#define CREATE_TRACE_POINTS + #include "dm_services_types.h" #include "dc.h" #include "dc/inc/core_types.h" diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h new file mode 100644 index 000000000000..d898981684d5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -0,0 +1,104 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amdgpu_dm + +#if !defined(_AMDGPU_DM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _AMDGPU_DM_TRACE_H_ + +#include + +TRACE_EVENT(amdgpu_dc_rreg, + TP_PROTO(unsigned long *read_count, uint32_t reg, uint32_t value), + TP_ARGS(read_count, reg, value), + TP_STRUCT__entry( + __field(uint32_t, reg) + __field(uint32_t, value) + ), + TP_fast_assign( + __entry->reg = reg; + __entry->value = value; + *read_count = *read_count + 1; + ), + TP_printk("reg=0x%08lx, value=0x%08lx", + (unsigned long)__entry->reg, + (unsigned long)__entry->value) +); + +TRACE_EVENT(amdgpu_dc_wreg, + TP_PROTO(unsigned long *write_count, uint32_t reg, uint32_t value), + TP_ARGS(write_count, reg, value), + TP_STRUCT__entry( + __field(uint32_t, reg) + __field(uint32_t, value) + ), + TP_fast_assign( + __entry->reg = reg; + __entry->value = value; + *write_count = *write_count + 1; + ), + TP_printk("reg=0x%08lx, value=0x%08lx", + (unsigned long)__entry->reg, + (unsigned long)__entry->value) +); + + +TRACE_EVENT(amdgpu_dc_performance, + TP_PROTO(unsigned long read_count, unsigned long write_count, + unsigned long *last_read, unsigned long *last_write, + const char *func, unsigned int line), + TP_ARGS(read_count, write_count, last_read, last_write, func, line), + TP_STRUCT__entry( + __field(uint32_t, reads) + __field(uint32_t, writes) + __field(uint32_t, read_delta) + __field(uint32_t, write_delta) + __string(func, func) + __field(uint32_t, line) + ), + TP_fast_assign( + __entry->reads = read_count; + __entry->writes = write_count; + __entry->read_delta = read_count - *last_read; + __entry->write_delta = write_count - *last_write; + __assign_str(func, func); + __entry->line = line; + *last_read = read_count; + *last_write = write_count; + ), + TP_printk("%s:%d reads=%08ld (%08ld total), writes=%08ld (%08ld total)", + __get_str(func), __entry->line, + (unsigned long)__entry->read_delta, + (unsigned long)__entry->reads, + (unsigned long)__entry->write_delta, + (unsigned long)__entry->writes) +); +#endif /* _AMDGPU_DM_TRACE_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE amdgpu_dm_trace +#include diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8a182cb35c8f..d9c57984394b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -224,6 +224,17 @@ failed_alloc: return false; } +static struct dc_perf_trace *dc_perf_trace_create(void) +{ + return kzalloc(sizeof(struct dc_perf_trace), GFP_KERNEL); +} + +static void dc_perf_trace_destroy(struct dc_perf_trace **perf_trace) +{ + kfree(*perf_trace); + *perf_trace = NULL; +} + /** ***************************************************************************** * Function: dc_stream_adjust_vmin_vmax @@ -585,6 +596,8 @@ static void destruct(struct dc *dc) if (dc->ctx->created_bios) dal_bios_parser_destroy(&dc->ctx->dc_bios); + dc_perf_trace_destroy(&dc->ctx->perf_trace); + kfree(dc->ctx); dc->ctx = NULL; @@ -708,6 +721,12 @@ static bool construct(struct dc *dc, goto fail; } + dc_ctx->perf_trace = dc_perf_trace_create(); + if (!dc_ctx->perf_trace) { + ASSERT_CRITICAL(false); + goto fail; + } + /* Create GPIO service */ dc_ctx->gpio_service = dal_gpio_service_create( dc_version, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 91911ef8d746..0b20ae23f169 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -73,10 +73,18 @@ struct hw_asic_id { void *atombios_base_address; }; +struct dc_perf_trace { + unsigned long read_count; + unsigned long write_count; + unsigned long last_entry_read; + unsigned long last_entry_write; +}; + struct dc_context { struct dc *dc; void *driver_context; /* e.g. amdgpu_device */ + struct dc_perf_trace *perf_trace; void *cgs_device; enum dce_environment dce_environment; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 3eea44092a04..7469333a2c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -324,7 +324,7 @@ bool cm_helper_translate_curve_to_hw_format( if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) return false; - PERF_TRACE(); + PERF_TRACE_CTX(output_tf->ctx); corner_points = lut_params->corner_points; rgb_resulted = lut_params->rgb_resulted; @@ -513,7 +513,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format( if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) return false; - PERF_TRACE(); + PERF_TRACE_CTX(output_tf->ctx); corner_points = lut_params->corner_points; rgb_resulted = lut_params->rgb_resulted; diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 28128c02de00..1961cc6d9143 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -31,6 +31,8 @@ #define __DM_SERVICES_H__ +#include "amdgpu_dm_trace.h" + /* TODO: remove when DC is complete. */ #include "dm_services_types.h" #include "logger_interface.h" @@ -70,6 +72,7 @@ static inline uint32_t dm_read_reg_func( } #endif value = cgs_read_register(ctx->cgs_device, address); + trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); return value; } @@ -90,6 +93,7 @@ static inline void dm_write_reg_func( } #endif cgs_write_register(ctx->cgs_device, address, value); + trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value); } static inline uint32_t dm_read_index_reg( @@ -351,8 +355,12 @@ unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, /* * performance tracing */ -void dm_perf_trace_timestamp(const char *func_name, unsigned int line); -#define PERF_TRACE() dm_perf_trace_timestamp(__func__, __LINE__) +#define PERF_TRACE() trace_amdgpu_dc_performance(CTX->perf_trace->read_count,\ + CTX->perf_trace->write_count, &CTX->perf_trace->last_entry_read,\ + &CTX->perf_trace->last_entry_write, __func__, __LINE__) +#define PERF_TRACE_CTX(__CTX) trace_amdgpu_dc_performance(__CTX->perf_trace->read_count,\ + __CTX->perf_trace->write_count, &__CTX->perf_trace->last_entry_read,\ + &__CTX->perf_trace->last_entry_write, __func__, __LINE__) /* From 7d98e1e7ee9c8a083936547eadc2b0e46d43cb96 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 Nov 2018 19:20:28 -0500 Subject: [PATCH 24/60] drm/amdgpu/powerplay: fix mclk switch limit on polaris Update switch limit on newer polaris variants. This may fix flickering with high refresh rates with mclk switching enabled. Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 66e3d0177235..6bfbfd37ed92 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2859,7 +2859,10 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: - switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + if (hwmgr->is_kicker) + switch_limit_us = data->is_memory_gddr5 ? 450 : 150; + else + switch_limit_us = data->is_memory_gddr5 ? 190 : 150; break; case CHIP_VEGAM: switch_limit_us = 30; From de4aaab5cc9770a8c4dc13d9bfb6a83b06bba57e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 Nov 2018 19:22:07 -0500 Subject: [PATCH 25/60] drm/amdgpu/powerplay: fix clock stretcher limits on polaris (v2) Adjust limits for newer polaris variants. v2: fix polaris11 kicker (Jerry) Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/smumgr/polaris10_smumgr.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 2b2c26616902..94898b2da282 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -1528,8 +1528,21 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) efuse = efuse >> 24; if (hwmgr->chip_id == CHIP_POLARIS10) { - min = 1000; - max = 2300; + if (hwmgr->is_kicker) { + min = 1200; + max = 2500; + } else { + min = 1000; + max = 2300; + } + } else if (hwmgr->chip_id == CHIP_POLARIS11) { + if (hwmgr->is_kicker) { + min = 900; + max = 2100; + } else { + min = 1100; + max = 2100; + } } else { min = 1100; max = 2100; From 223577753b54acf0033de9585340909a0ef05e68 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 2 Dec 2018 21:47:42 -0500 Subject: [PATCH 26/60] drm/amdgpu/si: fix SI after doorbell rework SI does not use doorbells, move asic doorbell init later asic check. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=108920 Reviewed-by: Oak Zeng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9fd9f63adc08..ef36cc595985 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -515,7 +515,6 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { - amdgpu_asic_init_doorbell_index(adev); /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { @@ -529,6 +528,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) return -EINVAL; + amdgpu_asic_init_doorbell_index(adev); + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); From d4295e12796e747f9a624a56cd54de51fb2b3bdd Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 22 Nov 2018 09:39:17 -0500 Subject: [PATCH 27/60] drm/amd/include: Add mmhub 9.4 reg offsets and shift-mask In particular, we need the mmMC_VM_XGMI_LFB_CNTL register, for determining if xGMI is enabled on VG20. This will be used by DC to determine the correct spread spectrum adjustment for display and audio clocks. Reviewed-by: Alex Deucher Signed-off-by: Leo Li Signed-off-by: Alex Deucher --- .../asic_reg/mmhub/mmhub_9_4_0_offset.h | 32 +++++++++++++++++ .../asic_reg/mmhub/mmhub_9_4_0_sh_mask.h | 35 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h new file mode 100644 index 000000000000..8f515875a34d --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mmhub_9_4_0_OFFSET_HEADER +#define _mmhub_9_4_0_OFFSET_HEADER + + +// addressBlock: mmhub_utcl2_vmsharedpfdec +// base address: 0x6a040 +#define mmMC_VM_XGMI_LFB_CNTL 0x0823 +#define mmMC_VM_XGMI_LFB_CNTL_BASE_IDX 0 +#define mmMC_VM_XGMI_LFB_SIZE 0x0824 +#define mmMC_VM_XGMI_LFB_SIZE_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h new file mode 100644 index 000000000000..0a6b072d191e --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mmhub_9_4_0_SH_MASK_HEADER +#define _mmhub_9_4_0_SH_MASK_HEADER + + +// addressBlock: mmhub_utcl2_vmsharedpfdec +//MC_VM_XGMI_LFB_CNTL +#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION__SHIFT 0x0 +#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION__SHIFT 0x4 +#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION_MASK 0x00000007L +#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION_MASK 0x00000070L +//MC_VM_XGMI_LFB_SIZE +#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT 0x0 +#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK 0x0000FFFFL + +#endif From 8288b2e5ae01cabd078836618fd651574343cbfc Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 5 Dec 2018 15:43:19 +0800 Subject: [PATCH 28/60] drm/amdgpu/acpi: NULL check before some freeing functions is not needed kfree(NULL) is safe, so removes NULL check before freeing the mem. This patch also fix the ifnullfree.cocci warnings. Reviewed-by: Lyude Paul Signed-off-by: Wen Yang CC: Alex Deucher CC: christian.koenig@amd.com CC: "David (ChunMing) Zhou" CC: David Airlie (maintainer:DRM DRIVERS) CC: Lyude Paul CC: Rex Zhu CC: Jim Qu CC: amd-gfx@lists.freedesktop.org CC: dri-devel@lists.freedesktop.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 47db65926d71..4376b17ca594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -886,6 +886,5 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); - if (adev->atif) - kfree(adev->atif); + kfree(adev->atif); } From 9afd07566b6c908324cb4072102e2ce96bce986a Mon Sep 17 00:00:00 2001 From: Sharat Masetty Date: Thu, 29 Nov 2018 15:35:19 +0530 Subject: [PATCH 29/60] drm/scheduler: Set sched->thread to NULL on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cases where the scheduler instance is used as a base object of another driver object, it's not clear if the driver can call scheduler cleanup on the fail path. So, Set the sched->thread to NULL, so that the driver can safely call drm_sched_fini() during cleanup. Signed-off-by: Sharat Masetty Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9d4cd196037a..05b803d1248d 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -612,7 +612,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, long timeout, const char *name) { - int i; + int i, ret; sched->ops = ops; sched->hw_submission_limit = hw_submission; sched->name = name; @@ -633,8 +633,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, /* Each scheduler will run on a seperate kernel thread */ sched->thread = kthread_run(drm_sched_main, sched, sched->name); if (IS_ERR(sched->thread)) { + ret = PTR_ERR(sched->thread); + sched->thread = NULL; DRM_ERROR("Failed to create scheduler for %s.\n", name); - return PTR_ERR(sched->thread); + return ret; } sched->ready = true; From 1db8c142b6c557a951e8f9866b98953fe91cbdd6 Mon Sep 17 00:00:00 2001 From: Sharat Masetty Date: Thu, 29 Nov 2018 15:35:20 +0530 Subject: [PATCH 30/60] drm/scheduler: Add drm_sched_suspend/resume_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two new functions to help client drivers suspend and resume the scheduler job timeout. This can be useful in cases where the hardware has preemption support enabled. Using this, it is possible to have the timeout active only for the ring which is active on the ringbuffer. This patch also makes the job_list_lock IRQ safe. Suggested-by: Christian Koenig Signed-off-by: Sharat Masetty Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/etnaviv/etnaviv_dump.c | 9 +-- drivers/gpu/drm/scheduler/sched_main.c | 85 ++++++++++++++++++++++---- include/drm/gpu_scheduler.h | 4 ++ 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 9146e30e24a6..fd6bad2100cf 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -118,6 +118,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) unsigned int n_obj, n_bomap_pages; size_t file_size, mmu_size; __le64 *bomap, *bomap_start; + unsigned long flags; /* Only catch the first event, or when manually re-armed */ if (!etnaviv_dump_core) @@ -134,13 +135,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) mmu_size + gpu->buffer.size; /* Add in the active command buffers */ - spin_lock(&gpu->sched.job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) { submit = to_etnaviv_submit(s_job); file_size += submit->cmdbuf.size; n_obj++; } - spin_unlock(&gpu->sched.job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); /* Add in the active buffer objects */ list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) { @@ -182,14 +183,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) gpu->buffer.size, etnaviv_cmdbuf_get_va(&gpu->buffer)); - spin_lock(&gpu->sched.job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) { submit = to_etnaviv_submit(s_job); etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, submit->cmdbuf.vaddr, submit->cmdbuf.size, etnaviv_cmdbuf_get_va(&submit->cmdbuf)); } - spin_unlock(&gpu->sched.job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); /* Reserve space for the bomap */ if (n_bomap_pages) { diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 05b803d1248d..dbb69063b3d5 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -211,6 +211,62 @@ void drm_sched_fault(struct drm_gpu_scheduler *sched) } EXPORT_SYMBOL(drm_sched_fault); +/** + * drm_sched_suspend_timeout - Suspend scheduler job timeout + * + * @sched: scheduler instance for which to suspend the timeout + * + * Suspend the delayed work timeout for the scheduler. This is done by + * modifying the delayed work timeout to an arbitrary large value, + * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be + * called from an IRQ context. + * + * Returns the timeout remaining + * + */ +unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched) +{ + unsigned long sched_timeout, now = jiffies; + + sched_timeout = sched->work_tdr.timer.expires; + + /* + * Modify the timeout to an arbitrarily large value. This also prevents + * the timeout to be restarted when new submissions arrive + */ + if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) + && time_after(sched_timeout, now)) + return sched_timeout - now; + else + return sched->timeout; +} +EXPORT_SYMBOL(drm_sched_suspend_timeout); + +/** + * drm_sched_resume_timeout - Resume scheduler job timeout + * + * @sched: scheduler instance for which to resume the timeout + * @remaining: remaining timeout + * + * Resume the delayed work timeout for the scheduler. Note that + * this function can be called from an IRQ context. + */ +void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, + unsigned long remaining) +{ + unsigned long flags; + + spin_lock_irqsave(&sched->job_list_lock, flags); + + if (list_empty(&sched->ring_mirror_list)) + cancel_delayed_work(&sched->work_tdr); + else + mod_delayed_work(system_wq, &sched->work_tdr, remaining); + + spin_unlock_irqrestore(&sched->job_list_lock, flags); +} +EXPORT_SYMBOL(drm_sched_resume_timeout); + /* job_finish is called after hw fence signaled */ static void drm_sched_job_finish(struct work_struct *work) @@ -218,6 +274,7 @@ static void drm_sched_job_finish(struct work_struct *work) struct drm_sched_job *s_job = container_of(work, struct drm_sched_job, finish_work); struct drm_gpu_scheduler *sched = s_job->sched; + unsigned long flags; /* * Canceling the timeout without removing our job from the ring mirror @@ -228,12 +285,12 @@ static void drm_sched_job_finish(struct work_struct *work) */ cancel_delayed_work_sync(&sched->work_tdr); - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); /* remove job from ring_mirror_list */ list_del_init(&s_job->node); /* queue TDR for next job */ drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); sched->ops->free_job(s_job); } @@ -249,20 +306,22 @@ static void drm_sched_job_finish_cb(struct dma_fence *f, static void drm_sched_job_begin(struct drm_sched_job *s_job) { struct drm_gpu_scheduler *sched = s_job->sched; + unsigned long flags; dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, drm_sched_job_finish_cb); - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); list_add_tail(&s_job->node, &sched->ring_mirror_list); drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); } static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; + unsigned long flags; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); job = list_first_entry_or_null(&sched->ring_mirror_list, @@ -271,9 +330,9 @@ static void drm_sched_job_timedout(struct work_struct *work) if (job) job->sched->ops->timedout_job(job); - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); } /** @@ -287,9 +346,10 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo { struct drm_sched_job *s_job; struct drm_sched_entity *entity, *tmp; + unsigned long flags; int i; - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { if (s_job->s_fence->parent && dma_fence_remove_callback(s_job->s_fence->parent, @@ -299,7 +359,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo atomic_dec(&sched->hw_rq_count); } } - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { atomic_inc(&bad->karma); @@ -337,9 +397,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) { struct drm_sched_job *s_job, *tmp; bool found_guilty = false; + unsigned long flags; int r; - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *fence; @@ -353,7 +414,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) if (found_guilty && s_job->s_fence->scheduled.context == guilty_context) dma_fence_set_error(&s_fence->finished, -ECANCELED); - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); @@ -372,10 +433,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) drm_sched_expel_job_unlocked(s_job); drm_sched_process_job(NULL, &s_fence->cb); } - spin_lock(&sched->job_list_lock); + spin_lock_irqsave(&sched->job_list_lock, flags); } drm_sched_start_timeout(sched); - spin_unlock(&sched->job_list_lock); + spin_unlock_irqrestore(&sched->job_list_lock, flags); } EXPORT_SYMBOL(drm_sched_job_recovery); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 926379d53484..47e19796c450 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -331,4 +331,8 @@ struct drm_sched_fence *drm_sched_fence_create( void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void drm_sched_fence_finished(struct drm_sched_fence *fence); +unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); +void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, + unsigned long remaining); + #endif From c6296f5a658974169261ed8fe75887e7552174af Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 Nov 2018 19:34:27 -0500 Subject: [PATCH 31/60] drm/amdgpu/powerplay: update smu7_ppsmc.h Add new messages for polaris. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h index 62f36ba2435b..d11d6a797ce4 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h @@ -395,6 +395,9 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_SetVBITimeout ((uint16_t) 0x306) +#define PPSMC_MSG_EnableFFC ((uint16_t) 0x307) +#define PPSMC_MSG_DisableFFC ((uint16_t) 0x308) + #define PPSMC_MSG_EnableDpmDidt ((uint16_t) 0x309) #define PPSMC_MSG_DisableDpmDidt ((uint16_t) 0x30A) From 34c08da2097abe99cdc4757194f10a0c51148ce3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 Nov 2018 19:35:14 -0500 Subject: [PATCH 32/60] drm/amdgpu/powerplay: check MC firmware for FFC support Check if the MC firmware supports FFC and tell the SMC so mclk switching is handled properly. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 6bfbfd37ed92..d94c7d03bf24 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -4222,9 +4222,17 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr) if (tmp & (1 << 23)) { data->mem_latency_high = MEM_LATENCY_HIGH; data->mem_latency_low = MEM_LATENCY_LOW; + if ((hwmgr->chip_id == CHIP_POLARIS10) || + (hwmgr->chip_id == CHIP_POLARIS11) || + (hwmgr->chip_id == CHIP_POLARIS12)) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableFFC); } else { data->mem_latency_high = 330; data->mem_latency_low = 330; + if ((hwmgr->chip_id == CHIP_POLARIS10) || + (hwmgr->chip_id == CHIP_POLARIS11) || + (hwmgr->chip_id == CHIP_POLARIS12)) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableFFC); } return 0; From c50fe0c5b174153669fd437c1c5fde724b3bc7f4 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 5 Dec 2018 11:07:55 +0800 Subject: [PATCH 33/60] drm/amdgpu: both support PCO FP5/AM4 rlc fw For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin For Picasso && FP5 SOCKET board, we use picasso_rlc.bin Judgment method: PCO AM4: revision >= 0xC8 && revision <= 0xCF or revision >= 0xD8 && revision <= 0xDF otherwise is PCO FP5 Signed-off-by: Aaron Liu Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index af8ccb014be3..f62d570a81a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); @@ -645,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + /* + * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin + * instead of picasso_rlc.bin. + * Judgment method: + * PCO AM4: revision >= 0xC8 && revision <= 0xCF + * or revision >= 0xD8 && revision <= 0xDF + * otherwise is PCO FP5 + */ + if (!strcmp(chip_name, "picasso") && + (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || + ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; From a9f34c70fd168b164aadffd46bb757ded52e25b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 19 Sep 2018 16:25:08 +0200 Subject: [PATCH 34/60] drm/ttm: allow reserving more than one shared slot v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's support simultaneous submissions to multiple engines. v2: rename the field to num_shared and fix up all users v3: rebased Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/qxl/qxl_release.c | 2 +- drivers/gpu/drm/radeon/radeon_cs.c | 4 ++-- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c | 4 ++-- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 12 +++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 10 +++++----- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 2 +- include/drm/ttm/ttm_execbuf_util.h | 4 ++-- 14 files changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f3129b912714..b29ef088fa14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -535,7 +535,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdgpu_bo *bo = mem->bo; INIT_LIST_HEAD(&entry->head); - entry->shared = true; + entry->num_shared = 1; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) @@ -676,7 +676,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -740,7 +740,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -1830,7 +1830,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) validate_list.head) { list_add_tail(&mem->resv_list.head, &resv_list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } /* Reserve all BOs and page tables for validation */ @@ -2049,7 +2049,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) list_add_tail(&mem->resv_list.head, &ctx.list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 14d2982a47cc..b75d30ee80c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -118,7 +118,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &bo->tbo; - entry->tv.shared = !bo->prime_shared_count; + entry->tv.num_shared = !bo->prime_shared_count; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) list->gds_obj = bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 024dfbd87f11..a4b2ac541475 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -50,7 +50,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - p->uf_entry.tv.shared = true; + p->uf_entry.tv.num_shared = 1; p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 5b550706ee76..7e22be7ca68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -74,7 +74,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); csa_tv.bo = &bo->tbo; - csa_tv.shared = true; + csa_tv.num_shared = 1; list_add(&csa_tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &pd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7b3d1ebda9df..f4f00217546e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; - tv.shared = true; + tv.num_shared = 1; list_add(&tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); @@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); + if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + tv.num_shared = 1; + else + tv.num_shared = 0; list_add(&tv.head, &list); } else { gobj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 58a2363040dd..39fb5d24cc51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -617,7 +617,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - entry->tv.shared = true; + entry->tv.num_shared = 1; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 3813ec198900..18030e2be71f 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -217,7 +217,7 @@ int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo) qxl_bo_ref(bo); entry->tv.bo = &bo->tbo; - entry->tv.shared = false; + entry->tv.num_shared = 0; list_add_tail(&entry->tv.head, &release->bos); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 1ae31dbc61c6..f43305329939 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -178,7 +178,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) } p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; - p->relocs[i].tv.shared = !r->write_domain; + p->relocs[i].tv.num_shared = !r->write_domain; radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, priority); @@ -253,7 +253,7 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p) resv = reloc->robj->tbo.resv; r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, - reloc->tv.shared); + reloc->tv.num_shared); if (r) return r; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 27d8e7dd2d06..44617dec8183 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -552,7 +552,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev, INIT_LIST_HEAD(&list); tv.bo = &bo_va->bo->tbo; - tv.shared = true; + tv.num_shared = 1; list_add(&tv.head, &list); vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index a3d2ca07a058..0d374211661c 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -142,7 +142,7 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[0].tv.bo = &vm->page_directory->tbo; - list[0].tv.shared = true; + list[0].tv.num_shared = 1; list[0].tiling_flags = 0; list_add(&list[0].tv.head, head); @@ -154,7 +154,7 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].tv.bo = &list[idx].robj->tbo; - list[idx].tv.shared = true; + list[idx].tv.num_shared = 1; list[idx].tiling_flags = 0; list_add(&list[idx++].tv.head, head); } diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index efa005a1c1b7..93860346c426 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -126,10 +126,11 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, } if (!ret) { - if (!entry->shared) + if (!entry->num_shared) continue; - ret = reservation_object_reserve_shared(bo->resv, 1); + ret = reservation_object_reserve_shared(bo->resv, + entry->num_shared); if (!ret) continue; } @@ -150,8 +151,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, } } - if (!ret && entry->shared) - ret = reservation_object_reserve_shared(bo->resv, 1); + if (!ret && entry->num_shared) + ret = reservation_object_reserve_shared(bo->resv, + entry->num_shared); if (unlikely(ret != 0)) { if (ret == -EINTR) @@ -199,7 +201,7 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { bo = entry->bo; - if (entry->shared) + if (entry->num_shared) reservation_object_add_shared_fence(bo->resv, fence); else reservation_object_add_excl_fence(bo->resv, fence); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 8a029bade32a..3025bfc001a1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -85,7 +85,7 @@ static void vmw_resource_release(struct kref *kref) struct ttm_validate_buffer val_buf; val_buf.bo = bo; - val_buf.shared = false; + val_buf.num_shared = 0; res->func->unbind(res, false, &val_buf); } res->backup_dirty = false; @@ -462,7 +462,7 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket, INIT_LIST_HEAD(&val_list); val_buf->bo = ttm_bo_reference(&res->backup->base); - val_buf->shared = false; + val_buf->num_shared = 0; list_add_tail(&val_buf->head, &val_list); ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL); if (unlikely(ret != 0)) @@ -565,7 +565,7 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket, BUG_ON(!func->may_evict); val_buf.bo = NULL; - val_buf.shared = false; + val_buf.num_shared = 0; ret = vmw_resource_check_buffer(ticket, res, interruptible, &val_buf); if (unlikely(ret != 0)) return ret; @@ -614,7 +614,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr) return 0; val_buf.bo = NULL; - val_buf.shared = false; + val_buf.num_shared = 0; if (res->backup) val_buf.bo = &res->backup->base; do { @@ -685,7 +685,7 @@ void vmw_resource_unbind_list(struct vmw_buffer_object *vbo) struct vmw_resource *res, *next; struct ttm_validate_buffer val_buf = { .bo = &vbo->base, - .shared = false + .num_shared = 0 }; lockdep_assert_held(&vbo->base.resv->lock.base); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index 184025fa938e..fef22753f4de 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -266,7 +266,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); if (!val_buf->bo) return -ESRCH; - val_buf->shared = false; + val_buf->num_shared = 0; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit; diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index b0fdd1980034..621615fa7728 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -40,13 +40,13 @@ * * @head: list head for thread-private list. * @bo: refcounted buffer object pointer. - * @shared: should the fence be added shared? + * @num_shared: How many shared fences we want to add. */ struct ttm_validate_buffer { struct list_head head; struct ttm_buffer_object *bo; - bool shared; + unsigned int num_shared; }; /** From 049aca4363d8af87cab8d53de5401602db3b9999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 19 Sep 2018 16:54:35 +0200 Subject: [PATCH 35/60] drm/amdgpu: fix using shared fence for exported BOs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is perfectly possible that the BO list is created before the BO is exported. While at it clean up setting shared to one instead of true. v2: add comment and simplify logic Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Huang Rui Acked-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 13 +++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index b75d30ee80c6..5c79da8e1150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &bo->tbo; - entry->tv.num_shared = !bo->prime_shared_count; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) list->gds_obj = bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a4b2ac541475..ce58af62d523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -598,6 +598,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 1; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); @@ -717,8 +720,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* Make sure we use the exclusive slot for shared BOs */ + if (bo->prime_shared_count) + e->tv.num_shared = 0; + e->bo_va = amdgpu_vm_bo_find(vm, bo); + } if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; From 0aa7aa24cc11720a05b4492345f0adba8373c226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 21 Sep 2018 18:09:59 +0200 Subject: [PATCH 36/60] drm/amdgpu: always reserve two slots for the VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And drop the now superflous extra reservations. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++--------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ce58af62d523..8cc640abe3de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -964,10 +964,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); - if (r) - return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); if (amdgpu_vm_debug) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 39fb5d24cc51..b303ac79c2f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - entry->tv.num_shared = 1; + /* One for the VM updates and one for the CS job */ + entry->tv.num_shared = 2; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = reservation_object_reserve_shared(bo->tbo.resv, 1); - if (r) - return r; - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto error; @@ -1842,10 +1839,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -3026,6 +3019,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = reservation_object_reserve_shared(root->tbo.resv, 1); + if (r) + goto error_unreserve; + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); From 07daa8a0784e3085c4c8e282a83dd8c92bf59b53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 24 Sep 2018 13:35:08 +0200 Subject: [PATCH 37/60] drm/amdgpu: always reserve one more shared slot for pipelined BO moves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to drop the extra reserve in TTM. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Reviewed-by: Junwei Zhang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8cc640abe3de..4a6a1d4a88ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - p->uf_entry.tv.num_shared = 1; + /* One for TTM and one for the CS job */ + p->uf_entry.tv.num_shared = 2; p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -598,8 +599,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 1; + e->tv.num_shared = 2; amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b303ac79c2f6..b095bbbbc302 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -617,8 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - /* One for the VM updates and one for the CS job */ - entry->tv.num_shared = 2; + /* One for the VM updates, one for TTM and one for the CS job */ + entry->tv.num_shared = 3; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } From d63cda5bfcd341b86c07beb5919107aae0d9ba99 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Tue, 4 Dec 2018 15:13:28 +0800 Subject: [PATCH 38/60] drm/amdgpu/psp: Get psp fw version through reading register If PSP FW is running already, driver will not load PSP FW again and skip it. So psp fw version is not correct if reading it from FW binary file, need to get right version from register. Signed-off-by: Xiangliang Yu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 2b19616cb402..8e5e1d68e454 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -171,8 +171,10 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) * are already been loaded. */ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); return 0; + } /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), From 5ec996dfb6a19f3ea6d7ab9e74e9f32954af8466 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Wed, 5 Dec 2018 11:23:43 +0800 Subject: [PATCH 39/60] drm/amdgpu/psp: Add support VMR ring for VF PSP only support VMR ring for SRIOV vf since v45 and all commands will be send to VMR ring for executing. VMR ring use C2PMSG 101 ~ 103 instead of C2PMSG 64 ~ 71. Signed-off-by: Xiangliang Yu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 18 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 + drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 5 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 88 ++++++++++++++++++------- 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e05dc66b1090..3142f844fd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -155,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } -static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, +bool psp_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + else + return false; +} + +static void psp_prep_tmr_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { - cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + if (psp_support_vmr_ring(psp)) + cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; + else + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; @@ -192,7 +204,7 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", PSP_TMR_SIZE, psp->tmr_mc_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 9ec5d1a666a6..10decf70c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -217,6 +217,7 @@ extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +bool psp_support_vmr_ring(struct psp_context *psp); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 882bd83a28c4..0de00fbe9233 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; @@ -89,7 +91,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ - + GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ + GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 8e5e1d68e454..6c9a1b748ca7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -173,6 +173,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; } @@ -298,26 +299,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + if (psp_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } return ret; } @@ -328,15 +350,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, int ret = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* Write the ring destroy command*/ + if (psp_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + /* Wait for response flag (bit 31) */ + if (psp_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); return ret; } @@ -375,7 +406,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -404,7 +438,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } From 53450efd6eb9e3155e16c14ce2ece387fc2d65e7 Mon Sep 17 00:00:00 2001 From: Xiangliang Yu Date: Wed, 5 Dec 2018 14:36:33 +0800 Subject: [PATCH 40/60] drm/amdgpu/psp: Destroy psp ring when doing gpu reset PSP ring need to be destroy before starting reinit for vf. This patche move it from hypervisor driver into guest. Signed-off-by: Xiangliang Yu Signed-off-by: Frank Min Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 3142f844fd32..6759d898b3ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -548,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0) + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { + psp_ring_destroy(psp, PSP_RING_TYPE__KM); goto skip_memalloc; + } psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) From 49ebca798696a6a5da02a562cd1fd0e645264511 Mon Sep 17 00:00:00 2001 From: wentalou Date: Thu, 6 Dec 2018 10:29:52 +0800 Subject: [PATCH 41/60] drm/amdgpu: Skip ring soft recovery when fence was NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_ring_soft_recovery would have Call-Trace, when s_fence->parent was NULL inside amdgpu_job_timedout. Check fence first, as drm_sched_hw_job_reset did. Signed-off-by: Wentao Lou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5b75bdc8dc28..335a0edf114b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!ring->funcs->soft_recovery) + if (!ring->funcs->soft_recovery || !fence) return false; atomic_inc(&ring->adev->gpu_reset_counter); From 9ed9203c3ee7acd3f6e2acb3f1f691366847c8b5 Mon Sep 17 00:00:00 2001 From: hersen wu Date: Wed, 28 Nov 2018 16:55:47 -0500 Subject: [PATCH 42/60] drm/amd/powerplay: rv dal-pplib interface refactor powerplay part [WHY] clarify dal input parameters to pplib interface, remove un-used parameters. dal knows exactly which parameters needed and their effects at pplib and smu sides. current dal sequence for dcn1_update_clock to pplib: 1.smu10_display_clock_voltage_request for dcefclk 2.smu10_display_clock_voltage_request for fclk 3.phm_store_dal_configuration_data { set_min_deep_sleep_dcfclk set_active_display_count store_cc6_data --- this data never be referenced new sequence will be: 1. set_display_count --- need add new pplib interface 2. set_min_deep_sleep_dcfclk -- new pplib interface 3. set_hard_min_dcfclk_by_freq 4. set_hard_min_fclk_by_freq after this code refactor, smu10_display_clock_voltage_request, phm_store_dal_configuration_data will not be needed for rv. [HOW] step 1: add new functions at pplib interface step 2: add new functions at amdgpu dm and dc Signed-off-by: hersen wu Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 4 + drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 82 ++++++++++++++++++- .../drm/amd/powerplay/hwmgr/hardwaremanager.c | 45 +++++++++- .../gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 36 +++++++- .../drm/amd/powerplay/inc/hardwaremanager.h | 3 + drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 4 +- 6 files changed, 165 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 980e696989b1..1479ea1dc3e7 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -276,6 +276,10 @@ struct amd_pm_funcs { struct amd_pp_simple_clock_info *clocks); int (*notify_smu_enable_pwe)(void *handle); int (*enable_mgpu_fan_boost)(void *handle); + int (*set_active_display_count)(void *handle, uint32_t count); + int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); + int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); + int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index b68c2e0fef01..9bc27f468d5b 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -725,7 +725,7 @@ static int pp_dpm_force_clock_level(void *handle, } if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { - pr_info("force clock level is for dpm manual mode only.\n"); + pr_debug("force clock level is for dpm manual mode only.\n"); return -EINVAL; } @@ -899,7 +899,7 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) } if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { - pr_info("power profile setting is for manual dpm mode only.\n"); + pr_debug("power profile setting is for manual dpm mode only.\n"); return ret; } @@ -1072,7 +1072,7 @@ static int pp_get_current_clocks(void *handle, &hw_clocks, PHM_PerformanceLevelDesignation_Activity); if (ret) { - pr_info("Error in phm_get_clock_info \n"); + pr_debug("Error in phm_get_clock_info \n"); mutex_unlock(&hwmgr->smu_lock); return -EINVAL; } @@ -1332,6 +1332,78 @@ static int pp_enable_mgpu_fan_boost(void *handle) return 0; } +static int pp_set_min_deep_sleep_dcefclk(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_hard_min_dcefclk_by_freq(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_hard_min_fclk_by_freq(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_hard_min_fclk_by_freq == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_active_display_count(void *handle, uint32_t count) +{ + struct pp_hwmgr *hwmgr = handle; + int ret = 0; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + mutex_lock(&hwmgr->smu_lock); + ret = phm_set_active_display_count(hwmgr, count); + mutex_unlock(&hwmgr->smu_lock); + + return ret; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1378,4 +1450,8 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_display_mode_validation_clocks = pp_get_display_mode_validation_clocks, .notify_smu_enable_pwe = pp_notify_smu_enable_pwe, .enable_mgpu_fan_boost = pp_enable_mgpu_fan_boost, + .set_active_display_count = pp_set_active_display_count, + .set_min_deep_sleep_dcefclk = pp_set_min_deep_sleep_dcefclk, + .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq, + .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 85119c2bdcc8..333b9b845971 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -286,8 +286,8 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, if (display_config == NULL) return -EINVAL; - if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk) - hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); + if (NULL != hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk) + hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); for (index = 0; index < display_config->num_path_including_non_display; index++) { if (display_config->displays[index].controller_id != 0) @@ -478,3 +478,44 @@ int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr) return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr); } + +int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_active_display_count) + return -EINVAL; + + return hwmgr->hwmgr_func->set_active_display_count(hwmgr, count); +} + +int phm_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk) + return -EINVAL; + + return hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock); +} + +int phm_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq) + return -EINVAL; + + return hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock); +} + +int phm_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_hard_min_fclk_by_freq) + return -EINVAL; + + return hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock); +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index dd18cb710391..f95c5f50eb0f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -216,12 +216,12 @@ static inline uint32_t convert_10k_to_mhz(uint32_t clock) return (clock + 99) / 100; } -static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) +static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); if (smu10_data->need_min_deep_sleep_dcefclk && - smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) { + smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) { smu10_data->deep_sleep_dcefclk = convert_10k_to_mhz(clock); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk, @@ -230,6 +230,34 @@ static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) return 0; } +static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (smu10_data->dcf_actual_hard_min_freq && + smu10_data->dcf_actual_hard_min_freq != convert_10k_to_mhz(clock)) { + smu10_data->dcf_actual_hard_min_freq = convert_10k_to_mhz(clock); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinDcefclkByFreq, + smu10_data->dcf_actual_hard_min_freq); + } + return 0; +} + +static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (smu10_data->f_actual_hard_min_freq && + smu10_data->f_actual_hard_min_freq != convert_10k_to_mhz(clock)) { + smu10_data->f_actual_hard_min_freq = convert_10k_to_mhz(clock); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + smu10_data->f_actual_hard_min_freq); + } + return 0; +} + static int smu10_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); @@ -1206,7 +1234,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .get_max_high_clocks = smu10_get_max_high_clocks, .read_sensor = smu10_read_sensor, .set_active_display_count = smu10_set_active_display_count, - .set_deep_sleep_dcefclk = smu10_set_deep_sleep_dcefclk, + .set_min_deep_sleep_dcefclk = smu10_set_min_deep_sleep_dcefclk, .dynamic_state_management_enable = smu10_enable_dpm_tasks, .power_off_asic = smu10_power_off_asic, .asic_setup = smu10_setup_asic_task, @@ -1217,6 +1245,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .display_clock_voltage_request = smu10_display_clock_voltage_request, .powergate_gfx = smu10_gfx_off_control, .powergate_sdma = smu10_powergate_sdma, + .set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq, + .set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 54fd0125d9cf..f4dab979a3a1 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -463,5 +463,8 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr, extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr); + +extern int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count); + #endif /* _HARDWARE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index fb0f96f7cdbc..0d298a0409f5 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -309,7 +309,7 @@ struct pp_hwmgr_func { int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable); int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr); int (*set_active_display_count)(struct pp_hwmgr *hwmgr, uint32_t count); - int (*set_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_min_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*start_thermal_controller)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range); int (*notify_cac_buffer_info)(struct pp_hwmgr *hwmgr, uint32_t virtual_addr_low, @@ -332,6 +332,8 @@ struct pp_hwmgr_func { int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr); int (*powergate_sdma)(struct pp_hwmgr *hwmgr, bool bgate); int (*enable_mgpu_fan_boost)(struct pp_hwmgr *hwmgr); + int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); }; struct pp_table_func { From 5634e38cda7ba47f9e0f372cf1e1729a845d3348 Mon Sep 17 00:00:00 2001 From: "Kuehling, Felix" Date: Fri, 7 Dec 2018 22:07:20 +0000 Subject: [PATCH 43/60] drm/amdgpu: Workaround build failure due to trace conflict Avoid including mmu_context.h in amdgpu_amdkfd.h since that may be included in other header files that define traces. This leads to conflicts due to traces defined in other headers included via mmu_context.h. Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index bcf587b4ba98..86cf1a432ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 72a357dae070..ff7fac7df34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0e2a56b6a9b6..56ea929f524b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 03b604c96d94..5c51d4910650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "amdgpu.h" #include "amdgpu_amdkfd.h" From 611736d8447c0c48a172db0b968dddae60696a72 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 19 Nov 2018 20:05:54 -0500 Subject: [PATCH 44/60] drm/amdgpu: Add KFD VRAM limit checking We don't want KFD processes evicting each other over VRAM usage. Therefore prevent overcommitting VRAM among KFD applications with a per-GPU limit. Also leave enough room for page tables on top of the application memory usage. Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 52 ++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 8 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 60 +++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 5 files changed, 75 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6fc023bae7fe..5752024aa572 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -82,6 +82,7 @@ #include "amdgpu_bo_list.h" #include "amdgpu_gem.h" #include "amdgpu_doorbell.h" +#include "amdgpu_amdkfd.h" #define MAX_GPU_INSTANCE 16 @@ -862,6 +863,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* KFD */ + struct amdgpu_kfd_dev kfd; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -875,9 +879,6 @@ struct amdgpu_device { atomic64_t visible_pin_size; atomic64_t gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b12dd4fa37a1..68b29a210eaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -31,10 +31,20 @@ const struct kgd2kfd_calls *kgd2kfd; static const unsigned int compute_vmid_bitmap = 0xFF00; +/* Total memory size in system memory and all GPU VRAM. Used to + * estimate worst case amount of memory to reserve for page tables + */ +uint64_t amdgpu_amdkfd_total_mem_size; + int amdgpu_amdkfd_init(void) { + struct sysinfo si; int ret; + si_meminfo(&si); + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; + amdgpu_amdkfd_total_mem_size *= si.mem_unit; + #ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) @@ -87,8 +97,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); + + if (adev->kfd.dev) + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -128,7 +141,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i, n; int last_valid_bit; - if (adev->kfd) { + + if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, @@ -167,7 +181,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_start_offset); if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); return; } @@ -196,37 +210,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.reserved_doorbell_mask = 0x1e0; gpu_resources.reserved_doorbell_val = 0x0e0; - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (adev->kfd) { - kgd2kfd->device_exit(adev->kfd); - adev->kfd = NULL; + if (adev->kfd.dev) { + kgd2kfd->device_exit(adev->kfd.dev); + adev->kfd.dev = NULL; } } void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (adev->kfd) - kgd2kfd->interrupt(adev->kfd, ih_ring_entry); + if (adev->kfd.dev) + kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); } void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (adev->kfd) - kgd2kfd->suspend(adev->kfd); + if (adev->kfd.dev) + kgd2kfd->suspend(adev->kfd.dev); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->resume(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->resume(adev->kfd.dev); return r; } @@ -235,8 +249,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->pre_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->pre_reset(adev->kfd.dev); return r; } @@ -245,8 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->post_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->post_reset(adev->kfd.dev); return r; } @@ -498,7 +512,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd) { + if (adev->kfd.dev) { if ((1 << vmid) & compute_vmid_bitmap) return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 86cf1a432ad4..131c6e5e6f10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -34,6 +34,7 @@ #include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; +extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; @@ -76,6 +77,11 @@ struct amdgpu_amdkfd_fence { char timeline_name[TASK_COMM_LEN]; }; +struct amdgpu_kfd_dev { + struct kfd_dev *dev; + uint64_t vram_used; +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -195,6 +201,6 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b29ef088fa14..5fb60e1d713a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -110,17 +110,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { - size_t acc_size, system_mem_needed, ttm_mem_needed; + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); - spin_lock(&kfd_mem_limit.mem_limit_lock); - + vram_needed = 0; if (domain == AMDGPU_GEM_DOMAIN_GTT) { /* TTM GTT memory */ system_mem_needed = acc_size + size; @@ -133,23 +133,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, /* VRAM and SG */ system_mem_needed = acc_size; ttm_mem_needed = acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) + vram_needed = size; } + spin_lock(&kfd_mem_limit.mem_limit_lock); + if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit)) + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) || + (adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; - else { + } else { kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + adev->kfd.vram_used += vram_needed; } spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } -static void unreserve_system_mem_limit(struct amdgpu_device *adev, +static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -167,6 +174,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, } else { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + adev->kfd.vram_used -= size; + WARN_ONCE(adev->kfd.vram_used < 0, + "kfd VRAM memory accounting unbalanced"); + } } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); @@ -176,29 +188,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_unlock(&kfd_mem_limit.mem_limit_lock); } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { - spin_lock(&kfd_mem_limit.mem_limit_lock); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + u32 domain = bo->preferred_domains; + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - } else { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; + domain = AMDGPU_GEM_DOMAIN_CPU; + sg = false; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "kfd TTM memory accounting unbalanced"); - spin_unlock(&kfd_mem_limit.mem_limit_lock); + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); } @@ -1235,8 +1236,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, - alloc_domain, false); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); if (ret) { pr_debug("Insufficient system memory\n"); goto err_reserve_limit; @@ -1289,7 +1289,7 @@ allocate_init_user_pages_failed: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain, false); + unreserve_mem_limit(adev, size, alloc_domain, false); err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cc50cb65c212..fd271f9746a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_subtract_pin_size(bo); if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_amdkfd_unreserve_memory_limit(bo); amdgpu_bo_kunmap(bo); From 3704d56e1a64bb0e951815f91149ae7bb726aa76 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 20 Nov 2018 20:52:37 -0500 Subject: [PATCH 45/60] drm/amdkfd: Add NULL-pointer check top_dev->gpu is NULL for CPUs. Avoid dereferencing it if NULL. Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index aa793fcbbdcc..c5ed21ef2462 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -101,7 +101,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu->pdev == pdev) { + if (top_dev->gpu && top_dev->gpu->pdev == pdev) { device = top_dev->gpu; break; } From 1dde0ea95b782425b95455d487cb44991525a1d1 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 20 Nov 2018 21:00:29 -0500 Subject: [PATCH 46/60] drm/amdkfd: Add DMABuf import functionality This is used for interoperability between ROCm compute and graphics APIs. It allows importing graphics driver BOs into the ROCm SVM address space for zero-copy GPU access. The API is split into two steps (query and import) to allow user mode to manage the virtual address space allocation for the imported buffer. Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 57 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 11 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 55 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 118 +++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 18 +++ include/uapi/linux/kfd_ioctl.h | 26 +++- 9 files changed, 287 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 68b29a210eaa..68e4cf1b655c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include +#include const struct kgd2kfd_calls *kgd2kfd; @@ -433,6 +434,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dma_buf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + uint64_t metadata_flags; + int r = -EINVAL; + + dma_buf = dma_buf_get(dma_buf_fd); + if (IS_ERR(dma_buf)) + return PTR_ERR(dma_buf); + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + goto out_put; + + obj = dma_buf->priv; + if (obj->dev->driver != adev->ddev->driver) + /* Can't handle buffers from different drivers */ + goto out_put; + + adev = obj->dev->dev_private; + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + goto out_put; + + r = 0; + if (dma_buf_kgd) + *dma_buf_kgd = (struct kgd_dev *)adev; + if (bo_size) + *bo_size = amdgpu_bo_size(bo); + if (metadata_size) + *metadata_size = bo->metadata_size; + if (metadata_buffer) + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, + metadata_size, &metadata_flags); + if (flags) { + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + *flags |= ALLOC_MEM_FLAGS_PUBLIC; + } + +out_put: + dma_buf_put(dma_buf); + return r; +} + uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 131c6e5e6f10..70429f7aa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -149,6 +149,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dmabuf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); @@ -200,6 +205,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dmabuf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5fb60e1d713a..a0a500d45886 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -1664,6 +1665,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dma_buf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + return -EINVAL; + + obj = dma_buf->priv; + if (obj->dev->dev_private != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + if (size) + *size = amdgpu_bo_size(bo); + + if (mmap_offset) + *mmap_offset = amdgpu_bo_mmap_offset(bo); + + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index d63daba9b17c..f1ddfc50bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + /* * GEM objects. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 3e44d889f7af..71913a18d142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -39,8 +39,6 @@ #include #include -static const struct dma_buf_ops amdgpu_dmabuf_ops; - /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table * implementation @@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, return ret; } -static const struct dma_buf_ops amdgpu_dmabuf_ops = { +const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_gem_map_attach, .detach = amdgpu_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5f4062b41add..ae3ae0fb2602 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" @@ -1550,6 +1551,115 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_get_dmabuf_info(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_dmabuf_info_args *args = data; + struct kfd_dev *dev = NULL; + struct kgd_dev *dma_buf_kgd; + void *metadata_buffer = NULL; + uint32_t flags; + unsigned int i; + int r; + + /* Find a KFD GPU device that supports the get_dmabuf_info query */ + for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) + if (dev) + break; + if (!dev) + return -EINVAL; + + if (args->metadata_ptr) { + metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); + if (!metadata_buffer) + return -ENOMEM; + } + + /* Get dmabuf info from KGD */ + r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, + &dma_buf_kgd, &args->size, + metadata_buffer, args->metadata_size, + &args->metadata_size, &flags); + if (r) + goto exit; + + /* Reverse-lookup gpu_id from kgd pointer */ + dev = kfd_device_by_kgd(dma_buf_kgd); + if (!dev) { + r = -EINVAL; + goto exit; + } + args->gpu_id = dev->id; + args->flags = flags; + + /* Copy metadata buffer to user mode */ + if (metadata_buffer) { + r = copy_to_user((void __user *)args->metadata_ptr, + metadata_buffer, args->metadata_size); + if (r != 0) + r = -EFAULT; + } + +exit: + kfree(metadata_buffer); + + return r; +} + +static int kfd_ioctl_import_dmabuf(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_import_dmabuf_args *args = data; + struct kfd_process_device *pdd; + struct dma_buf *dmabuf; + struct kfd_dev *dev; + int idr_handle; + uint64_t size; + void *mem; + int r; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + dmabuf = dma_buf_get(args->dmabuf_fd); + if (!dmabuf) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + r = PTR_ERR(pdd); + goto err_unlock; + } + + r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, + args->va_addr, pdd->vm, + (struct kgd_mem **)&mem, &size, + NULL); + if (r) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + r = -EFAULT; + goto err_free; + } + + mutex_unlock(&p->mutex); + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + return 0; + +err_free: + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); +err_unlock: + mutex_unlock(&p->mutex); + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1635,7 +1745,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_cu_mask, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, - kfd_ioctl_get_queue_wave_state, 0) + kfd_ioctl_get_queue_wave_state, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, + kfd_ioctl_get_dmabuf_info, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, + kfd_ioctl_import_dmabuf, 0), }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index dec8e64f36bd..0689d4ccbbc0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -793,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain( struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); +struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); int kfd_numa_node_to_apic_id(int numa_node_id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c5ed21ef2462..5f5b2acedbac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -111,6 +111,24 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } +struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +{ + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu && top_dev->gpu->kgd == kgd) { + device = top_dev->gpu; + break; + } + + up_read(&topology_lock); + + return device; +} + /* Called with write topology_lock acquired */ static void kfd_release_topology_device(struct kfd_topology_device *dev) { diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index b01eb502d49c..e622fd1fbd46 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -398,6 +398,24 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +struct kfd_ioctl_get_dmabuf_info_args { + __u64 size; /* from KFD */ + __u64 metadata_ptr; /* to KFD */ + __u32 metadata_size; /* to KFD (space allocated by user) + * from KFD (actual metadata size) + */ + __u32 gpu_id; /* from KFD */ + __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ + __u32 dmabuf_fd; /* to KFD */ +}; + +struct kfd_ioctl_import_dmabuf_args { + __u64 va_addr; /* to KFD */ + __u64 handle; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 dmabuf_fd; /* to KFD */ +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -486,7 +504,13 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) +#define AMDKFD_IOC_GET_DMABUF_INFO \ + AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) + +#define AMDKFD_IOC_IMPORT_DMABUF \ + AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1C +#define AMDKFD_COMMAND_END 0x1E #endif From b408a548846f2343716351d55a6c9af9e73ec32c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 20 Nov 2018 21:44:27 -0500 Subject: [PATCH 47/60] drm/amdkfd: Add support for doorbell BOs This allows user mode to map doorbell pages into GPUVM address space. That way GPUs can submit to user mode queues (self-dispatch). Acked-by: Alex Deucher Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 59 +++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++ .../gpu/drm/amd/include/kgd_kfd_interface.h | 4 +- 3 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a0a500d45886..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -887,6 +887,24 @@ update_gpuvm_pte_failed: return ret; } +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg->sgl->dma_address = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1170,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; @@ -1198,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + bo_type = ttm_bo_type_sg; + alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; } else { return -EINVAL; } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) - return -ENOMEM; + if (!*mem) { + ret = -ENOMEM; + goto err; + } INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1237,7 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); if (ret) { pr_debug("Insufficient system memory\n"); goto err_reserve_limit; @@ -1251,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; - bp.type = ttm_bo_type_device; + bp.type = bo_type; bp.resv = NULL; ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { @@ -1259,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + if (bo_type == ttm_bo_type_sg) { + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1290,10 +1326,15 @@ allocate_init_user_pages_failed: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_mem_limit(adev, size, alloc_domain, false); + unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); +err: + if (sg) { + sg_free_table(sg); + kfree(sg); + } return ret; } @@ -1363,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. + */ + if (mem->bo->tbo.sg) { + sg_free_table(mem->bo->tbo.sg); + kfree(mem->bo->tbo.sg); + } + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ae3ae0fb2602..3623538baf6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1274,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return -EINVAL; } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (args->size != kfd_doorbell_process_slice(dev)) + return -EINVAL; + offset = kfd_get_process_doorbells(dev, p); + } + mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 58ac0b90c310..8154d67388cc 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -188,8 +188,8 @@ struct tile_config { */ #define ALLOC_MEM_FLAGS_VRAM (1 << 0) #define ALLOC_MEM_FLAGS_GTT (1 << 1) -#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ -#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ +#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) +#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* * Allocation flags attributes/access options. From 2026057736e9134c524ad85b526005944034e00f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 11:08:32 +0200 Subject: [PATCH 48/60] drm/amdgpu: send IVs to the KFD only after processing them v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to filter out VM faults in the GMC code. v2: don't filter out all faults v3: fix copy&paste typo, send all IV to the KFD, don't change message level Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 38 +++++++++++-------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6b6524f04ce0..79b6f456f2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -149,9 +149,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev, if (!amdgpu_ih_prescreen_iv(adev)) return; - /* Before dispatching irq to IP blocks, send it to amdkfd */ - amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]); - entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); @@ -371,39 +368,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, unsigned client_id = entry->client_id; unsigned src_id = entry->src_id; struct amdgpu_irq_src *src; + bool handled = false; int r; trace_amdgpu_iv(entry); if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); - return; - } - if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { + } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - return; - } - if (adev->irq.virq[src_id]) { + } else if (adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); - } else { - if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); - return; - } - src = adev->irq.client[client_id].sources[src_id]; - if (!src) { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); - return; - } + } else if (!adev->irq.client[client_id].sources) { + DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); + } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, entry); - if (r) + if (r < 0) DRM_ERROR("error processing interrupt (%d)\n", r); + else if (r) + handled = true; + + } else { + DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); } + + /* Send it to amdkfd as well if it isn't already handled */ + if (!handled) + amdgpu_amdkfd_interrupt(adev, entry->iv_entry); } /** From a655dad4b2f94e0d880f2e4ea45251092d836f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 11:15:36 +0200 Subject: [PATCH 49/60] drm/amdgpu: remove VM fault_credit handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printk_ratelimit() is much better suited to limit the number of reported VM faults. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 37 ------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ---- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 18 +----------- drivers/gpu/drm/amd/amdgpu/cz_ih.c | 18 +----------- drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 18 +----------- drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 18 +----------- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 7 ++--- 7 files changed, 6 insertions(+), 115 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b095bbbbc302..fc91f3e54a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3052,7 +3052,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } INIT_KFIFO(vm->faults); - vm->fault_credit = 16; return 0; @@ -3264,42 +3263,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vmid_free_reserved(adev, vm, i); } -/** - * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID - * - * @adev: amdgpu_device pointer - * @pasid: PASID do identify the VM - * - * This function is expected to be called in interrupt context. - * - * Returns: - * True if there was fault credit, false otherwise - */ -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid) -{ - struct amdgpu_vm *vm; - - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (!vm) { - /* VM not found, can't track fault credit */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) { - /* Too many faults in this VM */ - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - vm->fault_credit--; - spin_unlock(&adev->vm_manager.pasid_lock); - return true; -} - /** * amdgpu_vm_manager_init - init the VM manager * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2a8898d19c8b..e8dcfd59fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -229,9 +229,6 @@ struct amdgpu_vm { /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); - /* Limit non-retry fault storms */ - unsigned int fault_credit; - /* Points to the KFD process VM info */ struct amdkfd_process_info *process_info; @@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index b5775c6a857b..3e6c8c4067cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -237,23 +237,7 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) */ static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) { - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index df5ac4d85a00..447b3cbc47e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,23 +216,7 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) */ static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) { - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cf0fc61aebe6..2b94a6d1550e 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -216,23 +216,7 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) */ static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) { - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index dcdbb4d72472..9d7b43da6acc 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -227,23 +227,7 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) */ static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) { - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d84b687240d1..b49290bcf109 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -258,12 +258,9 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) if (!pasid) return true; - /* Not a retry fault, check fault credit */ - if (!(dw5 & 0x80)) { - if (!amdgpu_vm_pasid_fault_credit(adev, pasid)) - goto ignore_iv; + /* Not a retry fault */ + if (!(dw5 & 0x80)) return true; - } /* Track retry faults in per-VM fault FIFO. */ spin_lock(&adev->vm_manager.pasid_lock); From 22666cc1481ae3814d9c7718418cc4a3aa7d90c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 26 Sep 2018 11:50:09 +0200 Subject: [PATCH 50/60] drm/amdgpu: move IV prescreening into the GMC code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GMC/VM subsystem is causing the faults, so move the handling here as well. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 4 -- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 13 ---- drivers/gpu/drm/amd/amdgpu/cz_ih.c | 13 ---- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 59 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 13 ---- drivers/gpu/drm/amd/amdgpu/si_ih.c | 14 ----- drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 13 ---- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 82 ------------------------- 9 files changed, 59 insertions(+), 154 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 9ce8c93ec19b..f877bb78d10a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -51,14 +51,12 @@ struct amdgpu_ih_ring { struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ u32 (*get_wptr)(struct amdgpu_device *adev); - bool (*prescreen_iv)(struct amdgpu_device *adev); void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); void (*set_rptr)(struct amdgpu_device *adev); }; #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) -#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 79b6f456f2c5..b7968f426862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -145,10 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev, u32 ring_index = ih->rptr >> 2; struct amdgpu_iv_entry entry; - /* Prescreening of high-frequency interrupts */ - if (!amdgpu_ih_prescreen_iv(adev)) - return; - entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 3e6c8c4067cb..8a8b4967a101 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -228,18 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * [127:96] - reserved */ -/** - * cik_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) -{ - return true; -} - /** * cik_ih_decode_iv - decode an interrupt vector * @@ -445,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { static const struct amdgpu_ih_funcs cik_ih_funcs = { .get_wptr = cik_ih_get_wptr, - .prescreen_iv = cik_ih_prescreen_iv, .decode_iv = cik_ih_decode_iv, .set_rptr = cik_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 447b3cbc47e5..9d3ea298e116 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -207,18 +207,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * cz_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) -{ - return true; -} - /** * cz_ih_decode_iv - decode an interrupt vector * @@ -426,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { static const struct amdgpu_ih_funcs cz_ih_funcs = { .get_wptr = cz_ih_get_wptr, - .prescreen_iv = cz_ih_prescreen_iv, .decode_iv = cz_ih_decode_iv, .set_rptr = cz_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 016c7aab4a29..ce150de723c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } +/** + * vega10_ih_prescreen_iv - prescreen an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Returns true if the interrupt vector should be further processed. + */ +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + uint64_t addr) +{ + struct amdgpu_vm *vm; + u64 key; + int r; + + /* No PASID, can't identify faulting process */ + if (!entry->pasid) + return true; + + /* Not a retry fault */ + if (!(entry->src_data[1] & 0x80)) + return true; + + /* Track retry faults in per-VM fault FIFO. */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); + if (!vm) { + /* VM not found, process it normally */ + spin_unlock(&adev->vm_manager.pasid_lock); + return true; + } + + key = AMDGPU_VM_FAULT(entry->pasid, addr); + r = amdgpu_vm_add_fault(vm->fault_hash, key); + + /* Hash table is full or the fault is already being processed, + * ignore further page faults + */ + if (r != 0) { + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + /* No locking required with single writer and single reader */ + r = kfifo_put(&vm->faults, key); + if (!r) { + /* FIFO is full. Ignore it until there is space */ + amdgpu_vm_clear_fault(vm->fault_hash, key); + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + + spin_unlock(&adev->vm_manager.pasid_lock); + /* It's the first fault for this address, process it normally */ + return true; +} + static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2b94a6d1550e..a3984d10b604 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -207,18 +207,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * iceland_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) -{ - return true; -} - /** * iceland_ih_decode_iv - decode an interrupt vector * @@ -424,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { static const struct amdgpu_ih_funcs iceland_ih_funcs = { .get_wptr = iceland_ih_get_wptr, - .prescreen_iv = iceland_ih_prescreen_iv, .decode_iv = iceland_ih_decode_iv, .set_rptr = iceland_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index b3d7d9f83202..2938fb9f17cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * si_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool si_ih_prescreen_iv(struct amdgpu_device *adev) -{ - /* Process all interrupts */ - return true; -} - static void si_ih_decode_iv(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { @@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { static const struct amdgpu_ih_funcs si_ih_funcs = { .get_wptr = si_ih_get_wptr, - .prescreen_iv = si_ih_prescreen_iv, .decode_iv = si_ih_decode_iv, .set_rptr = si_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 9d7b43da6acc..15da06ddeb75 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -218,18 +218,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * tonga_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) -{ - return true; -} - /** * tonga_ih_decode_iv - decode an interrupt vector * @@ -490,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { static const struct amdgpu_ih_funcs tonga_ih_funcs = { .get_wptr = tonga_ih_get_wptr, - .prescreen_iv = tonga_ih_prescreen_iv, .decode_iv = tonga_ih_decode_iv, .set_rptr = tonga_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index b49290bcf109..2c250b01a903 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -219,87 +219,6 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u32 dw0, dw3, dw4, dw5; - u16 pasid; - u64 addr, key; - struct amdgpu_vm *vm; - int r; - - dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); - dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); - - /* Filter retry page faults, let only the first one pass. If - * there are too many outstanding faults, ignore them until - * some faults get cleared. - */ - switch (dw0 & 0xff) { - case SOC15_IH_CLIENTID_VMC: - case SOC15_IH_CLIENTID_UTCL2: - break; - default: - /* Not a VM fault */ - return true; - } - - pasid = dw3 & 0xffff; - /* No PASID, can't identify faulting process */ - if (!pasid) - return true; - - /* Not a retry fault */ - if (!(dw5 & 0x80)) - return true; - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12); - key = AMDGPU_VM_FAULT(pasid, addr); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } else { - r = amdgpu_vm_add_fault(vm->fault_hash, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) { - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - amdgpu_vm_clear_fault(vm->fault_hash, key); - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - - spin_unlock(&adev->vm_manager.pasid_lock); - /* It's the first fault for this address, process it normally */ - return true; - -ignore_iv: - adev->irq.ih.rptr += 32; - return false; -} - /** * vega10_ih_decode_iv - decode an interrupt vector * @@ -484,7 +403,6 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, - .prescreen_iv = vega10_ih_prescreen_iv, .decode_iv = vega10_ih_decode_iv, .set_rptr = vega10_ih_set_rptr }; From 31edd7c0a813d9dc778fcb5da1eb8202c140dc4d Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Fri, 7 Dec 2018 09:11:35 +0800 Subject: [PATCH 51/60] drm/amdgpu: bypass RLC init under sriov for Tonga (v2) RLC will go wrong in soft_reset under sriov Workaroound: only need to init RLC csb, and skip RLC stop, reset, start this is because host-driver has already done full initialization on RLC v2: squash in build fix Signed-off-by: Tiecehng Zhou Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1454fc306783..08f443fa7772 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4068,6 +4068,11 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_init_csb(adev); + return 0; + } + adev->gfx.rlc.funcs->stop(adev); adev->gfx.rlc.funcs->reset(adev); gfx_v8_0_init_pg(adev); From 08d1bdd4cc57e89d037205687a61d2b6ff9dddd4 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 24 Oct 2018 16:10:33 +0800 Subject: [PATCH 52/60] drm/amdgpu: Limit vm max ctx number to 4096 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit driver need to reserve resource for each ctx for some hw features. so add this limitation. Reviewed-by: Christian König Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5752024aa572..88db3c263e5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -164,6 +164,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95f4c4139fc6..d85184b5b35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return -ENOMEM; mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); if (r < 0) { mutex_unlock(&mgr->lock); kfree(ctx); From 2e431a1788c52d510bb24229cef93ba935f5785b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 8 Dec 2018 15:01:13 +0000 Subject: [PATCH 53/60] drm/amdgpu: remove set but not used variable 'grbm_soft_reset' Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c: In function 'gfx_v8_0_pre_soft_reset': drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c:4950:27: warning: variable 'srbm_soft_reset' set but not used [-Wunused-but-set-variable] drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c: In function 'gfx_v8_0_post_soft_reset': drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c:5054:27: warning: variable 'srbm_soft_reset' set but not used [-Wunused-but-set-variable] It never used since introduction in commit d31a501ead7f ("drm/amdgpu: add pre_soft_reset ip func") and e4ae0fc33631 ("drm/amdgpu: implement gfx8 post_soft_reset") Reviewed-by: Chunming Zhou Signed-off-by: YueHaibing Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 08f443fa7772..381f593b0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4952,14 +4952,13 @@ static bool gfx_v8_0_check_soft_reset(void *handle) static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); @@ -5056,14 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle) static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || From 816b6931315b641c5864cf33a9363cb89da05d0b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Dec 2018 10:41:27 -0500 Subject: [PATCH 54/60] drm/amdgpu/powerplay: Add special avfs cases for some polaris asics (v3) Add special avfs handling for some polaris variants. v2: fix copy paste typo. v3: fix asic rid check Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../amd/powerplay/smumgr/polaris10_smumgr.c | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 94898b2da282..b3e06e498834 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -1639,6 +1639,7 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct amdgpu_device *adev = hwmgr->adev; SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); int result = 0; @@ -1658,6 +1659,59 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) result = atomctrl_get_avfs_information(hwmgr, &avfs_params); + if (0 == result) { + if (((adev->pdev->device == 0x67ef) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe5))) || + ((adev->pdev->device == 0x67ff) && + ((adev->pdev->revision == 0xcf) || + (adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + if ((adev->pdev->device == 0x67ef && adev->pdev->revision == 0xe5) || + (adev->pdev->device == 0x67ff && adev->pdev->revision == 0xef)) { + if ((avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 == 0xEA522DD3) && + (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 == 0x5645A) && + (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 == 0x33F9E) && + (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 == 0xFFFFC5CC) && + (avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 == 0x1B1A) && + (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b == 0xFFFFFCED)) { + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF718F1D4; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x323FD; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0x1E455; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x23; + } + } + } else if (hwmgr->chip_id == CHIP_POLARIS12 && !hwmgr->is_kicker) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF6B024DD; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x3005E; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0x18A5F; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0x315; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFED1; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x3B; + } else if (((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe3) || + (adev->pdev->revision == 0xe4) || + (adev->pdev->revision == 0xe5) || + (adev->pdev->revision == 0xe7) || + (adev->pdev->revision == 0xef))) || + ((adev->pdev->device == 0x6fdf) && + ((adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF843B66B; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x59CB5; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0xFFFF287F; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFF23; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x58; + } + } + if (0 == result) { table->BTCGB_VDROOP_TABLE[0].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); table->BTCGB_VDROOP_TABLE[0].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); From 4f01b73e5adf5619d3e85a3c5845c89aad7a744d Mon Sep 17 00:00:00 2001 From: "Kuehling, Felix" Date: Mon, 10 Dec 2018 21:29:00 +0000 Subject: [PATCH 55/60] drm/amdgpu: Fix stub function name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function was renamed in a previous commit. Update the stub function name for builds with CONFIG_HSA_AMD disabled. Fixes: 611736d8447c ("drm/amdgpu: Add KFD VRAM limit checking") Acked-by: Andrey Grodzovsky Acked-by: Christian König Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 68e4cf1b655c..1e209e93dc9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -583,7 +583,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { } From 28c59469302a39277029b19672db32b0a5e9c30b Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 7 Dec 2018 12:15:01 -0500 Subject: [PATCH 56/60] Revert "drm/amd/display: Set RMX_ASPECT as default" This reverts commit 91b66c47ba3468f7882ea4a84d5e0e0c186b638f. Forcing RMX_ASPECT as default uses the preferred/native mode's timings for any mode the user selects and scales the image. This provides a a consistently nicer result in the case where the selected mode's refresh rate matches the native mode's refresh but this isn't always the case. For example, if the monitor is 1080p@144Hz and the preferred mode is 60Hz then even if the user selects 1080p@144Hz as their selected mode they'll get 1080p@60Hz. Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 943d1ae1de15..f8d2423cc19d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3100,10 +3100,8 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, rmx_type = RMX_FULL; break; case DRM_MODE_SCALE_NONE: - rmx_type = RMX_OFF; - break; default: - rmx_type = RMX_ASPECT; + rmx_type = RMX_OFF; break; } @@ -3216,7 +3214,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state = kzalloc(sizeof(*state), GFP_KERNEL); if (state) { - state->scaling = RMX_ASPECT; + state->scaling = RMX_OFF; state->underscan_enable = false; state->underscan_hborder = 0; state->underscan_vborder = 0; From 65276f2bebea047338196a584291c6c8ccd7ea13 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 28 Nov 2018 16:17:50 -0500 Subject: [PATCH 57/60] drm/amd/display: Fix unintialized max_bpc state values [Why] If the "max bpc" isn't explicitly set in the atomic state then it have a value of 0. This has the correct behavior of limiting a panel to 8bpc in the case where the panel supports 8bpc. In the case of eDP panels this isn't a true assumption - there are panels that can only do 6bpc. Banding occurs for these displays. [How] Initialize the max_bpc when the connector resets to 8bpc. Also carry over the value when the state is duplicated. Bugzilla: https://bugs.freedesktop.org/108825 Fixes: 307638884f72 ("drm/amd/display: Support amdgpu "max bpc" connector property") Signed-off-by: Nicholas Kazlauskas Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f8d2423cc19d..e40684a0ceb9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3218,6 +3218,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->underscan_enable = false; state->underscan_hborder = 0; state->underscan_vborder = 0; + state->max_bpc = 8; __drm_atomic_helper_connector_reset(connector, &state->base); } @@ -3239,6 +3240,7 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) new_state->freesync_capable = state->freesync_capable; new_state->abm_level = state->abm_level; + new_state->max_bpc = state->max_bpc; return &new_state->base; } From 922454c2e77c58062c81d33c564c5133343ab3e9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 7 Dec 2018 10:07:09 -0500 Subject: [PATCH 58/60] drm/amd/display: Fix duplicating scaling/underscan connector state [Why] These properties aren't being carried over when the atomic state. This tricks atomic check and commit tail into performing underscan and scaling operations when they aren't needed. With the patch that forced scaling/RMX_ASPECT on by default this results in many unnecessary surface updates and hangs under certain conditions. [How] Duplicate the properties. Fixes: 91b66c47ba34 ("drm/amd/display: Set RMX_ASPECT as default") Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e40684a0ceb9..ad0848dbd909 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3240,6 +3240,10 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) new_state->freesync_capable = state->freesync_capable; new_state->abm_level = state->abm_level; + new_state->scaling = state->scaling; + new_state->underscan_enable = state->underscan_enable; + new_state->underscan_hborder = state->underscan_hborder; + new_state->underscan_vborder = state->underscan_vborder; new_state->max_bpc = state->max_bpc; return &new_state->base; From fc42d47ce0118e2f59a67ac0b0da56f9dc454bd9 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Tue, 11 Dec 2018 15:31:35 -0500 Subject: [PATCH 59/60] drm/amdgpu: Enable GPU recovery by default for CI I retested Bonaire (gfx7 dGPU) and it works fine. Signed-off-by: Andrey Grodzovsky Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ef36cc595985..b60afeade50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3258,6 +3258,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) if (amdgpu_gpu_recovery == -1) { switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: From 674e78acae0dfb4beb56132e41cbae5b60f7d662 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 5 Dec 2018 14:59:07 -0500 Subject: [PATCH 60/60] drm/amd/display: Add fast path for cursor plane updates [Why] Legacy cursor plane updates from drm helpers go through the full atomic codepath. A high volume of cursor updates through this slow code path can cause subsequent page-flips to skip vblank intervals since each individual update is slow. This problem is particularly noticeable for the compton compositor. [How] A fast path for cursor plane updates is added by using DRM asynchronous commit support provided by async_check and async_update. These don't do a full state/flip_done dependency stall and they don't block other commit work. However, DC still expects itself to be single-threaded for anything that can issue register writes. Screen corruption or hangs can occur if write sequences overlap. Every call that potentially perform register writes needs to be guarded for asynchronous updates to work. The dc_lock mutex was added for this. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175 Signed-off-by: Nicholas Kazlauskas Acked-by: Andrey Grodzovsky Reviewed-by Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 +++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ad0848dbd909..c13856a46d8e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); +static void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state); @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* Zero all the fields */ memset(&init_data, 0, sizeof(init_data)); + mutex_init(&adev->dm.dc_lock); + if(amdgpu_dm_irq_init(adev)) { DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); goto error; @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) /* DC Destroy TODO: Replace destroy DAL */ if (adev->dm.dc) dc_destroy(&adev->dm.dc); + + mutex_destroy(&adev->dm.dc_lock); + return; } @@ -3617,10 +3625,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } +static int dm_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *new_plane_state) +{ + /* Only support async updates on cursor planes. */ + if (plane->type != DRM_PLANE_TYPE_CURSOR) + return -EINVAL; + + return 0; +} + +static void dm_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(new_state->state, plane); + + if (plane->state->fb != new_state->fb) + drm_atomic_set_fb_for_plane(plane->state, new_state->fb); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_w = new_state->src_w; + plane->state->src_h = new_state->src_h; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_w = new_state->crtc_w; + plane->state->crtc_h = new_state->crtc_h; + + handle_cursor_update(plane, old_state); +} + static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .prepare_fb = dm_plane_helper_prepare_fb, .cleanup_fb = dm_plane_helper_cleanup_fb, .atomic_check = dm_plane_atomic_check, + .atomic_async_check = dm_plane_atomic_async_check, + .atomic_async_update = dm_plane_atomic_async_update }; /* @@ -4309,6 +4350,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, static void handle_cursor_update(struct drm_plane *plane, struct drm_plane_state *old_plane_state) { + struct amdgpu_device *adev = plane->dev->dev_private; struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; @@ -4333,9 +4375,12 @@ static void handle_cursor_update(struct drm_plane *plane, if (!position.enable) { /* turn off cursor */ - if (crtc_state && crtc_state->stream) + if (crtc_state && crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); dc_stream_set_cursor_position(crtc_state->stream, &position); + mutex_unlock(&adev->dm.dc_lock); + } return; } @@ -4353,6 +4398,7 @@ static void handle_cursor_update(struct drm_plane *plane, attributes.pitch = attributes.width; if (crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); if (!dc_stream_set_cursor_attributes(crtc_state->stream, &attributes)) DRM_ERROR("DC failed to set cursor attributes\n"); @@ -4360,6 +4406,7 @@ static void handle_cursor_update(struct drm_plane *plane, if (!dc_stream_set_cursor_position(crtc_state->stream, &position)) DRM_ERROR("DC failed to set cursor position\n"); + mutex_unlock(&adev->dm.dc_lock); } } @@ -4575,6 +4622,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, &acrtc_state->stream->vrr_infopacket; } + mutex_lock(&adev->dm.dc_lock); dc_commit_updates_for_stream(adev->dm.dc, surface_updates, 1, @@ -4582,6 +4630,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, &stream_update, &surface_updates->surface, state); + mutex_unlock(&adev->dm.dc_lock); DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n", __func__, @@ -4596,6 +4645,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, * with a dc_plane_state and follow the atomic model a bit more closely here. */ static bool commit_planes_to_stream( + struct amdgpu_display_manager *dm, struct dc *dc, struct dc_plane_state **plane_states, uint8_t new_plane_count, @@ -4672,11 +4722,13 @@ static bool commit_planes_to_stream( updates[i].scaling_info = &scaling_info[i]; } + mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream( dc, updates, new_plane_count, dc_stream, stream_update, plane_states, state); + mutex_unlock(&dm->dc_lock); kfree(flip_addr); kfree(plane_info); @@ -4782,7 +4834,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_stream_attach->abm_level = acrtc_state->abm_level; - if (false == commit_planes_to_stream(dm->dc, + if (false == commit_planes_to_stream(dm, + dm->dc, plane_states_constructed, planes_count, acrtc_state, @@ -4952,7 +5005,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (dc_state) { dm_enable_per_frame_crtc_master_sync(dc_state); + mutex_lock(&dm->dc_lock); WARN_ON(!dc_commit_state(dm->dc, dc_state)); + mutex_unlock(&dm->dc_lock); } for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { @@ -5014,6 +5069,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /*TODO How it works with MPO ?*/ if (!commit_planes_to_stream( + dm, dm->dc, status->plane_states, status->plane_count, @@ -5906,6 +5962,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, ret = -EINVAL; goto fail; } + } else if (state->legacy_cursor_update) { + /* + * This is a fast cursor update coming from the plane update + * helper, check if it can be done asynchronously for better + * performance. + */ + state->async_update = !drm_atomic_helper_async_check(dev, state); } /* Must be success */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 4326dc256491..25bb91ee80ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -134,6 +134,14 @@ struct amdgpu_display_manager { struct drm_modeset_lock atomic_obj_lock; + /** + * @dc_lock: + * + * Guards access to DC functions that can issue register write + * sequences. + */ + struct mutex dc_lock; + /** * @irq_handler_list_low_tab: *