From acb03d591bea1b20a715201f479f1ad7bf7bb67b Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Fri, 31 Jul 2020 16:46:35 +0200 Subject: [PATCH] Insert text representation of hashtags into object["hashtags"] Includes a new mix task: pleroma.database fill_old_hashtags --- CHANGELOG.md | 2 +- docs/administration/CLI_tasks/database.md | 10 +++++ lib/mix/tasks/pleroma/database.ex | 43 +++++++++++++++++++ lib/pleroma/activity/ir/topics.ex | 4 ++ lib/pleroma/constants.ex | 3 +- lib/pleroma/web/activity_pub/activity_pub.ex | 8 ++-- .../web/activity_pub/mrf/simple_policy.ex | 8 ++-- .../web/activity_pub/transmogrifier.ex | 15 +++---- lib/pleroma/web/common_api/utils.ex | 11 ++++- .../web/mastodon_api/views/status_view.ex | 2 +- .../templates/feed/feed/_activity.atom.eex | 4 +- .../web/templates/feed/feed/_activity.rss.eex | 4 +- .../feed/feed/_tag_activity.atom.eex | 4 +- ...31165800_add_hashtags_index_to_objects.exs | 11 +++++ test/pleroma/activity/ir/topics_test.exs | 2 +- .../activity_pub/mrf/simple_policy_test.exs | 6 +-- .../transmogrifier/note_handling_test.exs | 4 +- .../web/activity_pub/transmogrifier_test.exs | 37 +++++++++------- test/pleroma/web/common_api/utils_test.exs | 3 +- test/pleroma/web/common_api_test.exs | 3 +- .../mastodon_api/views/status_view_test.exs | 4 +- test/support/factory.ex | 2 +- 22 files changed, 139 insertions(+), 51 deletions(-) create mode 100644 priv/repo/migrations/20200731165800_add_hashtags_index_to_objects.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index c6bf38ee0..a5e5f5ecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed +- **Breaking:** Changed storage of hashtags in plain-text to `object->hashtags`, run [`pleroma.database fill_old_hashtags` mix task](docs/administration/CLI_tasks/database.md) for old objects (works while pleroma is running). - Polls now always return a `voters_count`, even if they are single-choice. - Admin Emails: The ap id is used as the user link in emails now. - Improved registration workflow for email confirmation and account approval modes. @@ -432,7 +433,6 @@ switched to a new configuration mechanism, however it was not officially removed - Static-FE: Fix remote posts not being sanitized ### Fixed -======= - Rate limiter crashes when there is no explicitly specified ip in the config - 500 errors when no `Accept` header is present if Static-FE is enabled - Instance panel not being updated immediately due to wrong `Cache-Control` headers diff --git a/docs/administration/CLI_tasks/database.md b/docs/administration/CLI_tasks/database.md index 6dca83167..a2d2e8cdd 100644 --- a/docs/administration/CLI_tasks/database.md +++ b/docs/administration/CLI_tasks/database.md @@ -91,6 +91,16 @@ Can be safely re-run mix pleroma.database fix_likes_collections ``` +## Fill hashtags for old objects + +```sh tab="OTP" +./bin/pleroma_ctl database fill_old_hashtags +``` + +```sh tab="From Source" +mix pleroma.database fill_old_hashtags +``` + ## Vacuum the database ### Analyze diff --git a/lib/mix/tasks/pleroma/database.ex b/lib/mix/tasks/pleroma/database.ex index 22151ce08..0c1343313 100644 --- a/lib/mix/tasks/pleroma/database.ex +++ b/lib/mix/tasks/pleroma/database.ex @@ -128,6 +128,49 @@ defmodule Mix.Tasks.Pleroma.Database do |> Stream.run() end + def run(["fill_old_hashtags"]) do + import Ecto.Query + + start_pleroma() + + from( + o in Object, + where: fragment("(?)->>'hashtags' is null", o.data), + where: fragment("(?)->>'tag' != '[]'", o.data), + select: %{id: o.id, tag: fragment("(?)->>'tag'", o.data)}, + order_by: [:desc, o.id] + ) + |> Pleroma.Repo.chunk_stream(200, :batches) + |> Stream.each(fn objects -> + Repo.transaction(fn -> + objects_first = objects |> List.first() + objects_last = objects |> List.last() + + Logger.info( + "fill_old_hashtags: #{objects_first.id} (#{objects_first.inserted_at}) -- #{ + objects_last.id + } (#{objects_last.inserted_at})" + ) + + objects + |> Enum.map(fn object -> + tags = + object.tag + |> Jason.decode!() + |> Enum.filter(&is_bitstring(&1)) + + Object + |> where([o], o.id == ^object.id) + |> update([o], + set: [data: fragment("safe_jsonb_set(?, '{hashtags}', ?, true)", o.data, ^tags)] + ) + |> Repo.update_all([], timeout: :infinity) + end) + end) + end) + |> Stream.run() + end + def run(["vacuum", args]) do start_pleroma() diff --git a/lib/pleroma/activity/ir/topics.ex b/lib/pleroma/activity/ir/topics.ex index fe2e8cb5c..2cdecf1e4 100644 --- a/lib/pleroma/activity/ir/topics.ex +++ b/lib/pleroma/activity/ir/topics.ex @@ -48,6 +48,10 @@ defmodule Pleroma.Activity.Ir.Topics do tags end + defp hashtags_to_topics(%{data: %{"hashtags" => tags}}) do + Enum.map(tags, fn tag -> "hashtag:" <> tag end) + end + defp hashtags_to_topics(%{data: %{"tag" => tags}}) do tags |> Enum.filter(&is_bitstring(&1)) diff --git a/lib/pleroma/constants.ex b/lib/pleroma/constants.ex index cf8182d55..8f265715c 100644 --- a/lib/pleroma/constants.ex +++ b/lib/pleroma/constants.ex @@ -18,7 +18,8 @@ defmodule Pleroma.Constants do "emoji", "context_id", "deleted_activity_id", - "pleroma_internal" + "pleroma_internal", + "hashtags" ] ) diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 1c91bc074..61c1043ed 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -666,7 +666,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do from( [_activity, object] in query, - where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject) + where: fragment("not (?)->'hashtags' \\?| (?)", object.data, ^tag_reject) ) end @@ -679,7 +679,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) + where: fragment("(?)->'hashtags' \\?& (?)", object.data, ^tag_all) ) end @@ -692,14 +692,14 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do defp restrict_tag(query, %{tag: tag}) when is_list(tag) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag) + where: fragment("(?)->'hashtags' \\?| (?)", object.data, ^tag) ) end defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do from( [_activity, object] in query, - where: fragment("(?)->'tag' \\? (?)", object.data, ^tag) + where: fragment("(?)->'hashtags' \\? (?)", object.data, ^tag) ) end diff --git a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex index 6cd91826d..2fa7b3194 100644 --- a/lib/pleroma/web/activity_pub/mrf/simple_policy.ex +++ b/lib/pleroma/web/activity_pub/mrf/simple_policy.ex @@ -74,9 +74,11 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicy do object = if MRF.subdomain_match?(media_nsfw, actor_host) do - tags = (child_object["tag"] || []) ++ ["nsfw"] - child_object = Map.put(child_object, "tag", tags) - child_object = Map.put(child_object, "sensitive", true) + child_object = + child_object + |> Map.put("hashtags", (child_object["hashtags"] || []) ++ ["nsfw"]) + |> Map.put("sensitive", true) + Map.put(object, "object", child_object) else object diff --git a/lib/pleroma/web/activity_pub/transmogrifier.ex b/lib/pleroma/web/activity_pub/transmogrifier.ex index 565d32433..d3dc637da 100644 --- a/lib/pleroma/web/activity_pub/transmogrifier.ex +++ b/lib/pleroma/web/activity_pub/transmogrifier.ex @@ -312,16 +312,15 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do def fix_emoji(object), do: object def fix_tag(%{"tag" => tag} = object) when is_list(tag) do - tags = + hashtags = tag |> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end) - |> Enum.map(fn %{"name" => name} -> - name - |> String.slice(1..-1) - |> String.downcase() + |> Enum.map(fn + %{"name" => "#" <> hashtag} -> String.downcase(hashtag) + %{"name" => hashtag} -> String.downcase(hashtag) end) - Map.put(object, "tag", tag ++ tags) + Map.put(object, "hashtags", hashtags) end def fix_tag(%{"tag" => %{} = tag} = object) do @@ -865,7 +864,7 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do def add_hashtags(object) do tags = - (object["tag"] || []) + ((object["hashtags"] || []) ++ (object["tag"] || [])) |> Enum.map(fn # Expand internal representation tags into AS2 tags. tag when is_binary(tag) -> @@ -936,7 +935,7 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do end def set_sensitive(object) do - tags = object["tag"] || [] + tags = object["hashtags"] || object["tag"] || [] Map.put(object, "sensitive", "nsfw" in tags) end diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 1c74ea787..880b5d78f 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -310,7 +310,16 @@ defmodule Pleroma.Web.CommonAPI.Utils do "context" => draft.context, "attachment" => draft.attachments, "actor" => draft.user.ap_id, - "tag" => Keyword.values(draft.tags) |> Enum.uniq() + "tag" => Enum.filter(draft.tags, &is_map(&1)) |> Enum.uniq(), + "hashtags" => + draft.tags + |> Enum.reduce([], fn + # Why so many formats + {:name, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc + {"#" <> _, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc + x, acc -> if is_bitstring(x), do: [x | acc], else: acc + end) + |> Enum.uniq() } |> add_in_reply_to(draft.in_reply_to) |> Map.merge(draft.extra) diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 2301e21cf..6fc6272c2 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -347,7 +347,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do media_attachments: attachments, poll: render(PollView, "show.json", object: object, for: opts[:for]), mentions: mentions, - tags: build_tags(tags), + tags: build_tags(object.data["hashtags"] || tags), application: %{ name: "Web", website: nil diff --git a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex index 3fd150c4e..12a9545f3 100644 --- a/lib/pleroma/web/templates/feed/feed/_activity.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_activity.atom.eex @@ -22,8 +22,8 @@ <% end %> - <%= for tag <- @data["tag"] || [] do %> - + <%= for hashtag <- @data["hashtags"] || [] do %> + <% end %> <%= for attachment <- @data["attachment"] || [] do %> diff --git a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex index 42960de7d..00872b4b7 100644 --- a/lib/pleroma/web/templates/feed/feed/_activity.rss.eex +++ b/lib/pleroma/web/templates/feed/feed/_activity.rss.eex @@ -21,8 +21,8 @@ <%= @data["external_url"] %> <% end %> - <%= for tag <- @data["tag"] || [] do %> - + <%= for hashtag <- @data["hashtags"] || [] do %> + <% end %> <%= for attachment <- @data["attachment"] || [] do %> diff --git a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex index cf5874a91..1377a6bbc 100644 --- a/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex +++ b/lib/pleroma/web/templates/feed/feed/_tag_activity.atom.eex @@ -41,8 +41,8 @@ <% end %> <% end %> - <%= for tag <- @data["tag"] || [] do %> - + <%= for hashtag <- @data["hashtags"] || [] do %> + <% end %> <%= for {emoji, file} <- @data["emoji"] || %{} do %> diff --git a/priv/repo/migrations/20200731165800_add_hashtags_index_to_objects.exs b/priv/repo/migrations/20200731165800_add_hashtags_index_to_objects.exs new file mode 100644 index 000000000..b78682821 --- /dev/null +++ b/priv/repo/migrations/20200731165800_add_hashtags_index_to_objects.exs @@ -0,0 +1,11 @@ +defmodule Pleroma.Repo.Migrations.AddHashtagsIndexToObjects do + use Ecto.Migration + + def change do + drop_if_exists(index(:objects, ["(data->'tag')"], using: :gin, name: :objects_tags)) + + create_if_not_exists( + index(:objects, ["(data->'hashtags')"], using: :gin, name: :objects_hashtags) + ) + end +end diff --git a/test/pleroma/activity/ir/topics_test.exs b/test/pleroma/activity/ir/topics_test.exs index 5e5c2f8da..eb098ee95 100644 --- a/test/pleroma/activity/ir/topics_test.exs +++ b/test/pleroma/activity/ir/topics_test.exs @@ -78,7 +78,7 @@ defmodule Pleroma.Activity.Ir.TopicsTest do end test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do - tagged_data = Map.put(data, "tag", ["foo", "bar"]) + tagged_data = Map.put(data, "hashtags", ["foo", "bar"]) activity = %{activity | object: %{object | data: tagged_data}} topics = Topics.get_activity_topics(activity) diff --git a/test/pleroma/web/activity_pub/mrf/simple_policy_test.exs b/test/pleroma/web/activity_pub/mrf/simple_policy_test.exs index d7dde62c4..9777fcde1 100644 --- a/test/pleroma/web/activity_pub/mrf/simple_policy_test.exs +++ b/test/pleroma/web/activity_pub/mrf/simple_policy_test.exs @@ -78,7 +78,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicyTest do assert SimplePolicy.filter(media_message) == {:ok, media_message - |> put_in(["object", "tag"], ["foo", "nsfw"]) + |> put_in(["object", "hashtags"], ["foo", "nsfw"]) |> put_in(["object", "sensitive"], true)} assert SimplePolicy.filter(local_message) == {:ok, local_message} @@ -92,7 +92,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicyTest do assert SimplePolicy.filter(media_message) == {:ok, media_message - |> put_in(["object", "tag"], ["foo", "nsfw"]) + |> put_in(["object", "hashtags"], ["foo", "nsfw"]) |> put_in(["object", "sensitive"], true)} assert SimplePolicy.filter(local_message) == {:ok, local_message} @@ -105,7 +105,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicyTest do "type" => "Create", "object" => %{ "attachment" => [%{}], - "tag" => ["foo"], + "hashtags" => ["foo"], "sensitive" => false } } diff --git a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs index b4a006aec..528636f04 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/note_handling_test.exs @@ -39,7 +39,7 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) object = Object.normalize(data["object"]) - assert "test" in object.data["tag"] + assert ["test"] == object.data["hashtags"] end test "it cleans up incoming notices which are not really DMs" do @@ -220,7 +220,7 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier.NoteHandlingTest do {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) object = Object.normalize(data["object"]) - assert Enum.at(object.data["tag"], 2) == "moo" + assert object.data["hashtags"] == ["moo"] end test "it works for incoming notices with contentMap" do diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 66ea7664a..d0bd00b58 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -204,30 +204,37 @@ defmodule Pleroma.Web.ActivityPub.TransmogrifierTest do {:ok, activity} = CommonAPI.post(user, %{status: "#2hu :firefox:"}) - {:ok, modified} = Transmogrifier.prepare_outgoing(activity.data) + {:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data) - assert length(modified["object"]["tag"]) == 2 + assert [ + %{"name" => "#2hu", "type" => "Hashtag"}, + %{"name" => ":firefox:", "type" => "Emoji"} + ] = modified_object["tag"] - assert is_nil(modified["object"]["emoji"]) - assert is_nil(modified["object"]["like_count"]) - assert is_nil(modified["object"]["announcements"]) - assert is_nil(modified["object"]["announcement_count"]) - assert is_nil(modified["object"]["context_id"]) + refute Map.has_key?(modified_object, "hashtags") + refute Map.has_key?(modified_object, "emoji") + refute Map.has_key?(modified_object, "like_count") + refute Map.has_key?(modified_object, "announcements") + refute Map.has_key?(modified_object, "announcement_count") + refute Map.has_key?(modified_object, "context_id") end test "it strips internal fields of article" do activity = insert(:article_activity) - {:ok, modified} = Transmogrifier.prepare_outgoing(activity.data) + {:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data) - assert length(modified["object"]["tag"]) == 2 + assert [ + %{"name" => "#2hu", "type" => "Hashtag"}, + %{"name" => ":2hu:", "type" => "Emoji"} + ] = modified_object["tag"] - assert is_nil(modified["object"]["emoji"]) - assert is_nil(modified["object"]["like_count"]) - assert is_nil(modified["object"]["announcements"]) - assert is_nil(modified["object"]["announcement_count"]) - assert is_nil(modified["object"]["context_id"]) - assert is_nil(modified["object"]["likes"]) + refute Map.has_key?(modified_object, "hashtags") + refute Map.has_key?(modified_object, "emoji") + refute Map.has_key?(modified_object, "like_count") + refute Map.has_key?(modified_object, "announcements") + refute Map.has_key?(modified_object, "announcement_count") + refute Map.has_key?(modified_object, "context_id") end test "the directMessage flag is present" do diff --git a/test/pleroma/web/common_api/utils_test.exs b/test/pleroma/web/common_api/utils_test.exs index 4d6c9ea26..211042192 100644 --- a/test/pleroma/web/common_api/utils_test.exs +++ b/test/pleroma/web/common_api/utils_test.exs @@ -591,7 +591,8 @@ defmodule Pleroma.Web.CommonAPI.UtilsTest do "context" => "2hu", "sensitive" => false, "summary" => "test summary", - "tag" => ["jimm"], + "hashtags" => ["jimm"], + "tag" => [], "to" => [user2.ap_id], "type" => "Note", "custom_tag" => "test" diff --git a/test/pleroma/web/common_api_test.exs b/test/pleroma/web/common_api_test.exs index 585b2c174..3b7ac2033 100644 --- a/test/pleroma/web/common_api_test.exs +++ b/test/pleroma/web/common_api_test.exs @@ -493,7 +493,8 @@ defmodule Pleroma.Web.CommonAPITest do object = Object.normalize(activity) - assert object.data["tag"] == ["2hu"] + assert object.data["tag"] == [] + assert object.data["hashtags"] == ["2hu"] end test "it adds emoji in the object" do diff --git a/test/pleroma/web/mastodon_api/views/status_view_test.exs b/test/pleroma/web/mastodon_api/views/status_view_test.exs index f2a7469ed..ecce26261 100644 --- a/test/pleroma/web/mastodon_api/views/status_view_test.exs +++ b/test/pleroma/web/mastodon_api/views/status_view_test.exs @@ -262,8 +262,8 @@ defmodule Pleroma.Web.MastodonAPI.StatusViewTest do mentions: [], tags: [ %{ - name: "#{object_data["tag"]}", - url: "/tag/#{object_data["tag"]}" + name: "2hu", + url: "/tag/2hu" } ], application: %{ diff --git a/test/support/factory.ex b/test/support/factory.ex index 8eb07dc3c..a709d0dae 100644 --- a/test/support/factory.ex +++ b/test/support/factory.ex @@ -93,7 +93,7 @@ defmodule Pleroma.Factory do "like_count" => 0, "context" => "2hu", "summary" => "2hu", - "tag" => ["2hu"], + "hashtags" => ["2hu"], "emoji" => %{ "2hu" => "corndog.png" }