From b99f5d61834ffd86f9e8aeca2b00c704f0a0467e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9l=C3=A8ne?= Date: Tue, 26 Jul 2022 01:38:59 +0200 Subject: [PATCH] Emoji: split qualification variation into a module --- lib/pleroma/emoji.ex | 35 ++------------------------ lib/pleroma/emoji/combinations.ex | 41 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 lib/pleroma/emoji/combinations.ex diff --git a/lib/pleroma/emoji.ex b/lib/pleroma/emoji.ex index 3726ef185..dd65d56ae 100644 --- a/lib/pleroma/emoji.ex +++ b/lib/pleroma/emoji.ex @@ -9,6 +9,7 @@ defmodule Pleroma.Emoji do """ use GenServer + alias Pleroma.Emoji.Combinations alias Pleroma.Emoji.Loader require Logger @@ -138,42 +139,10 @@ defmodule Pleroma.Emoji do def is_unicode_emoji?(_), do: false - # FE0F is the emoji variation sequence. It is used for fully-qualifying - # emoji, and that includes emoji combinations. - # This code generates combinations per emoji: for each FE0F, all possible - # combinations of the character being removed or staying will be generated. - # This is made as an attempt to find all partially-qualified and unqualified - # versions of a fully-qualified emoji. - # I have found *no cases* for which this would be a problem, after browsing - # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most - # likely sane too. emoji_qualification_map = emojis |> Enum.filter(&String.contains?(&1, "\uFE0F")) - |> Enum.map(fn emoji -> - combinate = fn x, combinate -> - case x do - [] -> - [[]] - - ["\uFE0F" | tail] -> - combinate.(tail, combinate) - |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) - - [codepoint | tail] -> - combinate.(tail, combinate) - |> Enum.map(fn x -> [codepoint | x] end) - end - end - - unqualified_list = - emoji - |> String.codepoints() - |> combinate.(combinate) - |> Enum.map(&List.to_string/1) - - {emoji, unqualified_list} - end) + |> Combinations.variate_emoji_qualification() for {qualified, unqualified_list} <- emoji_qualification_map do for unqualified <- unqualified_list do diff --git a/lib/pleroma/emoji/combinations.ex b/lib/pleroma/emoji/combinations.ex new file mode 100644 index 000000000..c49466406 --- /dev/null +++ b/lib/pleroma/emoji/combinations.ex @@ -0,0 +1,41 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Emoji.Combinations do + # FE0F is the emoji variation sequence. It is used for fully-qualifying + # emoji, and that includes emoji combinations. + # This code generates combinations per emoji: for each FE0F, all possible + # combinations of the character being removed or staying will be generated. + # This is made as an attempt to find all partially-qualified and unqualified + # versions of a fully-qualified emoji. + # I have found *no cases* for which this would be a problem, after browsing + # the entire emoji list in emoji-test.txt. This is safe, and, sadly, most + # likely sane too. + + defp qualification_combinations([]), do: [[]] + + defp qualification_combinations(["\uFE0F" | tail]) do + tail + |> qualification_combinations() + |> Enum.flat_map(fn x -> [x, ["\uFE0F" | x]] end) + end + + defp qualification_combinations([codepoint | tail]) do + tail + |> qualification_combinations() + |> Enum.map(fn x -> [codepoint | x] end) + end + + def variate_emoji_qualification(emoji) when is_binary(emoji) do + emoji + |> String.codepoints() + |> qualification_combinations() + |> Enum.map(&List.to_string/1) + end + + def variate_emoji_qualification(emoji) when is_list(emoji) do + emoji + |> Enum.map(fn emoji -> {emoji, variate_emoji_qualification(emoji)} end) + end +end