Merge branch 'bugfix/rich-media-non-unicode' into 'develop'

rich media non-unicode bugfix

See merge request pleroma/pleroma!749
This commit is contained in:
lambda 2019-01-31 16:54:48 +00:00
commit 44913c1019
4 changed files with 4948 additions and 13 deletions

View File

@ -30,7 +30,7 @@ defmodule Pleroma.Web.RichMedia.Parser do
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
html |> maybe_parse() |> get_parsed_data()
html |> maybe_parse() |> clean_parsed_data() |> check_parsed_data()
rescue
e ->
{:error, "Parsing error: #{inspect(e)}"}
@ -46,11 +46,33 @@ defmodule Pleroma.Web.RichMedia.Parser do
end)
end
defp get_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
defp check_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
{:ok, data}
end
defp get_parsed_data(data) do
defp check_parsed_data(data) do
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
end
defp string_is_valid_unicode(data) when is_binary(data) do
data
|> :unicode.characters_to_binary()
|> clean_string()
end
defp string_is_valid_unicode(data), do: {:ok, data}
defp clean_string({:error, _, _}), do: {:error, "Invalid data"}
defp clean_string(data), do: {:ok, data}
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {_, val} ->
case string_is_valid_unicode(val) do
{:ok, _} -> false
_ -> true
end
end)
|> Map.new()
end
end

File diff suppressed because one or more lines are too long

View File

@ -143,7 +143,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://squeet.me/xrd/?uri=lain@squeet.me", _, _,
def get(
"https://squeet.me/xrd/?uri=lain@squeet.me",
_,
_,
Accept: "application/xrd+xml,application/jrd+json"
) do
{:ok,
@ -153,7 +156,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://mst3k.interlinked.me/users/luciferMysticus", _, _,
def get(
"https://mst3k.interlinked.me/users/luciferMysticus",
_,
_,
Accept: "application/activity+json"
) do
{:ok,
@ -171,7 +177,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://hubzilla.example.org/channel/kaniini", _, _,
def get(
"https://hubzilla.example.org/channel/kaniini",
_,
_,
Accept: "application/activity+json"
) do
{:ok,
@ -248,7 +257,10 @@ defmodule HttpRequestMock do
}}
end
def get("http://mastodon.example.org/@admin/99541947525187367", _, _,
def get(
"http://mastodon.example.org/@admin/99541947525187367",
_,
_,
Accept: "application/activity+json"
) do
{:ok,
@ -274,7 +286,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://mstdn.io/users/mayuutann/statuses/99568293732299394", _, _,
def get(
"https://mstdn.io/users/mayuutann/statuses/99568293732299394",
_,
_,
Accept: "application/activity+json"
) do
{:ok,
@ -429,7 +444,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056", _, _,
def get(
"https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056",
_,
_,
Accept: "application/atom+xml"
) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/sakamoto.atom")}}
@ -510,7 +528,10 @@ defmodule HttpRequestMock do
%Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/squeet.me_host_meta")}}
end
def get("https://squeet.me/xrd?uri=lain@squeet.me", _, _,
def get(
"https://squeet.me/xrd?uri=lain@squeet.me",
_,
_,
Accept: "application/xrd+xml,application/jrd+json"
) do
{:ok,
@ -541,7 +562,10 @@ defmodule HttpRequestMock do
}}
end
def get("http://framatube.org/main/xrd?uri=framasoft@framatube.org", _, _,
def get(
"http://framatube.org/main/xrd?uri=framasoft@framatube.org",
_,
_,
Accept: "application/xrd+xml,application/jrd+json"
) do
{:ok,
@ -560,7 +584,10 @@ defmodule HttpRequestMock do
}}
end
def get("http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de", _, _,
def get(
"http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de",
_,
_,
Accept: "application/xrd+xml,application/jrd+json"
) do
{:ok,
@ -594,7 +621,10 @@ defmodule HttpRequestMock do
}}
end
def get("https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de", _, _,
def get(
"https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de",
_,
_,
Accept: "application/xrd+xml,application/jrd+json"
) do
{:ok,
@ -657,6 +687,11 @@ defmodule HttpRequestMock do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end
def get("http://example.com/malformed", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}
end
def get("http://example.com/empty", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: "hello"}}
end

View File

@ -88,4 +88,8 @@ defmodule Pleroma.Web.RichMedia.ParserTest do
width: "1024"
}}
end
test "rejects invalid OGP data" do
assert {:error, _} = Pleroma.Web.RichMedia.Parser.parse("http://example.com/malformed")
end
end