From 6b7b443ff95587b33f4b666e68ed82dc6fb485a5 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Tue, 6 Feb 2024 14:34:59 -0500 Subject: [PATCH] Pleroma.Web.RichMedia.Parser: Remove test-specific codepaths Also consolidate Tesla mocks into the HttpRequestMock module. Tests were not exercising the real codepaths. The Rich Media Preview only works with https, but most of these tests were only mocking http. --- lib/pleroma/caching.ex | 3 + lib/pleroma/web/rich_media/parser.ex | 101 ++++++++--------- test/fixtures/rich_media/oembed.html | 2 +- .../controllers/status_controller_test.exs | 8 +- .../chat_message_reference_view_test.exs | 1 + test/pleroma/web/rich_media/helpers_test.exs | 4 +- test/pleroma/web/rich_media/parser_test.exs | 105 +++--------------- test/support/cachex_proxy.ex | 6 + test/support/http_request_mock.ex | 61 +++++++++- test/support/null_cache.ex | 6 + 10 files changed, 144 insertions(+), 153 deletions(-) diff --git a/lib/pleroma/caching.ex b/lib/pleroma/caching.ex index eb0588708..796a465af 100644 --- a/lib/pleroma/caching.ex +++ b/lib/pleroma/caching.ex @@ -8,10 +8,13 @@ defmodule Pleroma.Caching do @callback put(Cachex.cache(), any(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback put(Cachex.cache(), any(), any()) :: {Cachex.status(), boolean()} @callback fetch!(Cachex.cache(), any(), function() | nil) :: any() + @callback fetch(Cachex.cache(), any(), function() | nil) :: + {atom(), any()} | {atom(), any(), any()} # @callback del(Cachex.cache(), any(), Keyword.t()) :: {Cachex.status(), boolean()} @callback del(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback stream!(Cachex.cache(), any()) :: Enumerable.t() @callback expire_at(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} + @callback expire(Cachex.cache(), binary(), number()) :: {Cachex.status(), boolean()} @callback exists?(Cachex.cache(), any()) :: {Cachex.status(), boolean()} @callback execute!(Cachex.cache(), function()) :: any() @callback get_and_update(Cachex.cache(), any(), function()) :: diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 837182d8a..a791b2bab 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -13,70 +13,65 @@ defmodule Pleroma.Web.RichMedia.Parser do def parse(nil), do: {:error, "No URL provided"} - if Pleroma.Config.get(:env) == :test do - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url), do: parse_url(url) - else - @spec parse(String.t()) :: {:ok, map()} | {:error, any()} - def parse(url) do - with {:ok, data} <- get_cached_or_parse(url), - {:ok, _} <- set_ttl_based_on_image(data, url) do - {:ok, data} - end + @spec parse(String.t()) :: {:ok, map()} | {:error, any()} + def parse(url) do + with {:ok, data} <- get_cached_or_parse(url), + {:ok, _} <- set_ttl_based_on_image(data, url) do + {:ok, data} end + end - defp get_cached_or_parse(url) do - case @cachex.fetch(:rich_media_cache, url, fn -> - case parse_url(url) do - {:ok, _} = res -> - {:commit, res} + defp get_cached_or_parse(url) do + case @cachex.fetch(:rich_media_cache, url, fn -> + case parse_url(url) do + {:ok, _} = res -> + {:commit, res} - {:error, reason} = e -> - # Unfortunately we have to log errors here, instead of doing that - # along with ttl setting at the bottom. Otherwise we can get log spam - # if more than one process was waiting for the rich media card - # while it was generated. Ideally we would set ttl here as well, - # so we don't override it number_of_waiters_on_generation - # times, but one, obviously, can't set ttl for not-yet-created entry - # and Cachex doesn't support returning ttl from the fetch callback. - log_error(url, reason) - {:commit, e} - end - end) do - {action, res} when action in [:commit, :ok] -> - case res do - {:ok, _data} = res -> - res + {:error, reason} = e -> + # Unfortunately we have to log errors here, instead of doing that + # along with ttl setting at the bottom. Otherwise we can get log spam + # if more than one process was waiting for the rich media card + # while it was generated. Ideally we would set ttl here as well, + # so we don't override it number_of_waiters_on_generation + # times, but one, obviously, can't set ttl for not-yet-created entry + # and Cachex doesn't support returning ttl from the fetch callback. + log_error(url, reason) + {:commit, e} + end + end) do + {action, res} when action in [:commit, :ok] -> + case res do + {:ok, _data} = res -> + res - {:error, reason} = e -> - if action == :commit, do: set_error_ttl(url, reason) - e - end + {:error, reason} = e -> + if action == :commit, do: set_error_ttl(url, reason) + e + end - {:error, e} -> - {:error, {:cachex_error, e}} - end + {:error, e} -> + {:error, {:cachex_error, e}} end + end - defp set_error_ttl(_url, :body_too_large), do: :ok - defp set_error_ttl(_url, {:content_type, _}), do: :ok + defp set_error_ttl(_url, :body_too_large), do: :ok + defp set_error_ttl(_url, {:content_type, _}), do: :ok - # The TTL is not set for the errors above, since they are unlikely to change - # with time + # The TTL is not set for the errors above, since they are unlikely to change + # with time - defp set_error_ttl(url, _reason) do - ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) - @cachex.expire(:rich_media_cache, url, ttl) - :ok - end + defp set_error_ttl(url, _reason) do + ttl = Pleroma.Config.get([:rich_media, :failure_backoff], 60_000) + @cachex.expire(:rich_media_cache, url, ttl) + :ok + end - defp log_error(url, {:invalid_metadata, data}) do - Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) - end + defp log_error(url, {:invalid_metadata, data}) do + Logger.debug(fn -> "Incomplete or invalid metadata for #{url}: #{inspect(data)}" end) + end - defp log_error(url, reason) do - Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) - end + defp log_error(url, reason) do + Logger.warning(fn -> "Rich media error for #{url}: #{inspect(reason)}" end) end @doc """ diff --git a/test/fixtures/rich_media/oembed.html b/test/fixtures/rich_media/oembed.html index 55f17004b..5429630d0 100644 --- a/test/fixtures/rich_media/oembed.html +++ b/test/fixtures/rich_media/oembed.html @@ -1,3 +1,3 @@ diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs index 7bbe46cbe..f95f15ec3 100644 --- a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -336,13 +336,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do path -> Pleroma.Test.StaticConfig.get(path) end) - Tesla.Mock.mock(fn - %{ - method: :get, - url: "https://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - + Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs index 44d40269c..c8b3cb391 100644 --- a/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs +++ b/test/pleroma/web/pleroma_api/views/chat_message_reference_view_test.exs @@ -49,6 +49,7 @@ defmodule Pleroma.Web.PleromaAPI.ChatMessageReferenceViewTest do :chat_message_id_idempotency_key_cache, ^id -> {:ok, "123"} cache, key -> NullCache.get(cache, key) end) + |> stub(:fetch, fn :rich_media_cache, _, _ -> {:ok, {:ok, %{}}} end) chat_message = MessageReferenceView.render("show.json", chat_message_reference: cm_ref) diff --git a/test/pleroma/web/rich_media/helpers_test.exs b/test/pleroma/web/rich_media/helpers_test.exs index bf7372476..13d2341ad 100644 --- a/test/pleroma/web/rich_media/helpers_test.exs +++ b/test/pleroma/web/rich_media/helpers_test.exs @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.HelpersTest do - use Pleroma.DataCase, async: true + use Pleroma.DataCase, async: false alias Pleroma.StaticStubbedConfigMock, as: ConfigMock alias Pleroma.Web.CommonAPI @@ -14,7 +14,7 @@ defmodule Pleroma.Web.RichMedia.HelpersTest do import Tesla.Mock setup do - mock(fn env -> apply(HttpRequestMock, :request, [env]) end) + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) ConfigMock |> stub(:get, fn diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 9064138a6..a05b89a2b 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -3,95 +3,26 @@ # SPDX-License-Identifier: AGPL-3.0-only defmodule Pleroma.Web.RichMedia.ParserTest do - use ExUnit.Case, async: true + use Pleroma.DataCase, async: false alias Pleroma.Web.RichMedia.Parser + import Tesla.Mock + setup do - Tesla.Mock.mock(fn - %{ - method: :get, - url: "http://example.com/ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} - - %{ - method: :get, - url: "http://example.com/non-ogp" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")} - - %{ - method: :get, - url: "http://example.com/ogp-missing-title" - } -> - %Tesla.Env{ - status: 200, - body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") - } - - %{ - method: :get, - url: "http://example.com/twitter-card" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} - - %{ - method: :get, - url: "http://example.com/oembed" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")} - - %{ - method: :get, - url: "http://example.com/oembed.json" - } -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")} - - %{method: :get, url: "http://example.com/empty"} -> - %Tesla.Env{status: 200, body: "hello"} - - %{method: :get, url: "http://example.com/malformed"} -> - %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")} - - %{method: :get, url: "http://example.com/error"} -> - {:error, :overload} - - %{ - method: :head, - url: "http://example.com/huge-page" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] - } - - %{ - method: :head, - url: "http://example.com/pdf-file" - } -> - %Tesla.Env{ - status: 200, - headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] - } - - %{method: :head} -> - %Tesla.Env{status: 404, body: "", headers: []} - end) - - :ok + mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) end test "returns error when no metadata present" do - assert {:error, _} = Parser.parse("http://example.com/empty") + assert {:error, _} = Parser.parse("https://example.com/empty") end test "doesn't just add a title" do - assert {:error, {:invalid_metadata, _}} = Parser.parse("http://example.com/non-ogp") + assert {:error, {:invalid_metadata, _}} = Parser.parse("https://example.com/non-ogp") end test "parses ogp" do - assert Parser.parse("http://example.com/ogp") == + assert Parser.parse("https://example.com/ogp") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -99,12 +30,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp" + "url" => "https://example.com/ogp" }} end test "falls back to when ogp:title is missing" do - assert Parser.parse("http://example.com/ogp-missing-title") == + assert Parser.parse("https://example.com/ogp-missing-title") == {:ok, %{ "image" => "http://ia.media-imdb.com/images/rock.jpg", @@ -112,12 +43,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "description" => "Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer.", "type" => "video.movie", - "url" => "http://example.com/ogp-missing-title" + "url" => "https://example.com/ogp-missing-title" }} end test "parses twitter card" do - assert Parser.parse("http://example.com/twitter-card") == + assert Parser.parse("https://example.com/twitter-card") == {:ok, %{ "card" => "summary", @@ -125,12 +56,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "image" => "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", "title" => "Small Island Developing States Photo Submission", "description" => "View the album on Flickr.", - "url" => "http://example.com/twitter-card" + "url" => "https://example.com/twitter-card" }} end test "parses OEmbed and filters HTML tags" do - assert Parser.parse("http://example.com/oembed") == + assert Parser.parse("https://example.com/oembed") == {:ok, %{ "author_name" => "\u202E\u202D\u202Cbees\u202C", @@ -150,7 +81,7 @@ defmodule Pleroma.Web.RichMedia.ParserTest do "thumbnail_width" => 150, "title" => "Bacon Lollys", "type" => "photo", - "url" => "http://example.com/oembed", + "url" => "https://example.com/oembed", "version" => "1.0", "web_page" => "https://www.flickr.com/photos/bees/2362225867/", "web_page_short_url" => "https://flic.kr/p/4AK2sc", @@ -159,18 +90,18 @@ defmodule Pleroma.Web.RichMedia.ParserTest do end test "rejects invalid OGP data" do - assert {:error, _} = Parser.parse("http://example.com/malformed") + assert {:error, _} = Parser.parse("https://example.com/malformed") end test "returns error if getting page was not successful" do - assert {:error, :overload} = Parser.parse("http://example.com/error") + assert {:error, :overload} = Parser.parse("https://example.com/error") end test "does a HEAD request to check if the body is too large" do - assert {:error, :body_too_large} = Parser.parse("http://example.com/huge-page") + assert {:error, :body_too_large} = Parser.parse("https://example.com/huge-page") end test "does a HEAD request to check if the body is html" do - assert {:error, {:content_type, _}} = Parser.parse("http://example.com/pdf-file") + assert {:error, {:content_type, _}} = Parser.parse("https://example.com/pdf-file") end end diff --git a/test/support/cachex_proxy.ex b/test/support/cachex_proxy.ex index 83ae5610f..8f27986a9 100644 --- a/test/support/cachex_proxy.ex +++ b/test/support/cachex_proxy.ex @@ -26,9 +26,15 @@ defmodule Pleroma.CachexProxy do @impl true defdelegate fetch!(cache, key, func), to: Cachex + @impl true + defdelegate fetch(cache, key, func), to: Cachex + @impl true defdelegate expire_at(cache, str, num), to: Cachex + @impl true + defdelegate expire(cache, str, num), to: Cachex + @impl true defdelegate exists?(cache, key), to: Cachex diff --git a/test/support/http_request_mock.ex b/test/support/http_request_mock.ex index df3371a75..f4b6f1f9f 100644 --- a/test/support/http_request_mock.ex +++ b/test/support/http_request_mock.ex @@ -1059,7 +1059,7 @@ defmodule HttpRequestMock do }} end - def get("http://example.com/malformed", _, _, _) do + def get("https://example.com/malformed", _, _, _) do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}} end @@ -1472,6 +1472,37 @@ defmodule HttpRequestMock do {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/yahoo.html")}} end + def get("https://example.com/error", _, _, _), do: {:error, :overload} + + def get("https://example.com/ogp-missing-title", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + body: File.read!("test/fixtures/rich_media/ogp-missing-title.html") + }} + end + + def get("https://example.com/oembed", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.html")}} + end + + def get("https://example.com/oembed.json", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/oembed.json")}} + end + + def get("https://example.com/twitter-card", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")}} + end + + def get("https://example.com/non-ogp", _, _, _) do + {:ok, + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/non_ogp_embed.html")}} + end + + def get("https://example.com/empty", _, _, _) do + {:ok, %Tesla.Env{status: 200, body: "hello"}} + end + def get(url, query, body, headers) do {:error, "Mock response not implemented for GET #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} @@ -1545,17 +1576,41 @@ defmodule HttpRequestMock do # Most of the rich media mocks are missing HEAD requests, so we just return 404. @rich_media_mocks [ + "https://example.com/empty", + "https://example.com/error", + "https://example.com/malformed", + "https://example.com/non-ogp", + "https://example.com/oembed", + "https://example.com/oembed.json", "https://example.com/ogp", "https://example.com/ogp-missing-data", + "https://example.com/ogp-missing-title", "https://example.com/twitter-card", "https://google.com/", - "https://yahoo.com/", - "https://pleroma.local/notice/9kCP7V" + "https://pleroma.local/notice/9kCP7V", + "https://yahoo.com/" ] + def head(url, _query, _body, _headers) when url in @rich_media_mocks do {:ok, %Tesla.Env{status: 404, body: ""}} end + def head("https://example.com/pdf-file", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "1000000"}, {"content-type", "application/pdf"}] + }} + end + + def head("https://example.com/huge-page", _, _, _) do + {:ok, + %Tesla.Env{ + status: 200, + headers: [{"content-length", "2000001"}, {"content-type", "text/html"}] + }} + end + def head(url, query, body, headers) do {:error, "Mock response not implemented for HEAD #{inspect(url)}, #{query}, #{inspect(body)}, #{inspect(headers)}"} diff --git a/test/support/null_cache.ex b/test/support/null_cache.ex index 9f1d45f1d..47c84174e 100644 --- a/test/support/null_cache.ex +++ b/test/support/null_cache.ex @@ -28,6 +28,9 @@ defmodule Pleroma.NullCache do end end + @impl true + def fetch(_, key, func), do: func.(key) + @impl true def get_and_update(_, _, func) do func.(nil) @@ -36,6 +39,9 @@ defmodule Pleroma.NullCache do @impl true def expire_at(_, _, _), do: {:ok, true} + @impl true + def expire(_, _, _), do: {:ok, true} + @impl true def exists?(_, _), do: {:ok, false}