diff --git a/src/parserutils.nim b/src/parserutils.nim index 7cf696e..6b8263f 100644 --- a/src/parserutils.nim +++ b/src/parserutils.nim @@ -1,9 +1,17 @@ # SPDX-License-Identifier: AGPL-3.0-only -import std/[strutils, times, macros, htmlgen, options, algorithm, re] +import std/[times, macros, htmlgen, options, algorithm, re] +import std/strutils except escape import std/unicode except strip +from xmltree import escape import packedjson import types, utils, formatters +const + unicodeOpen = "\uFFFA" + unicodeClose = "\uFFFB" + xmlOpen = escape("<") + xmlClose = escape(">") + let unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})" unReplace = "$1@$2" @@ -304,7 +312,9 @@ proc expandTweetEntities*(tweet: Tweet; js: JsonNode) = proc expandNoteTweetEntities*(tweet: Tweet; js: JsonNode) = let entities = ? js{"entity_set"} - text = js{"text"}.getStr + text = js{"text"}.getStr.multiReplace(("<", unicodeOpen), (">", unicodeClose)) textSlice = 0..text.runeLen tweet.expandTextEntities(entities, text, textSlice) + + tweet.text = tweet.text.multiReplace((unicodeOpen, xmlOpen), (unicodeClose, xmlClose)) diff --git a/tests/test_quote.py b/tests/test_quote.py index 1b458ea..4921c21 100644 --- a/tests/test_quote.py +++ b/tests/test_quote.py @@ -9,7 +9,7 @@ text = [ What are we doing wrong? reuters.com/article/us-norwa…"""], ['nim_lang/status/1491461266849808397#m', - 'Nim language', '@nim_lang', + 'Nim', '@nim_lang', """What's better than Nim 1.6.0? Nim 1.6.2 :) diff --git a/tests/test_tweet.py b/tests/test_tweet.py index 7a3c4ed..ac89782 100644 --- a/tests/test_tweet.py +++ b/tests/test_tweet.py @@ -35,7 +35,16 @@ multiline = [ CALM AND CLICHÉ - ON"""] + ON"""], + [1718660434457239868, 'WebDesignMuseum', + """ +Happy 32nd Birthday HTML tags! + +On October 29, 1991, the internet pioneer, Tim Berners-Lee, published a document entitled HTML Tags. + +The document contained a description of the first 18 HTML tags: , <nextid>, <a>, <isindex>, <plaintext>, <listing>, <p>, <h1>…<h6>, <address>, <hp1>, <hp2>…, <dl>, <dt>, <dd>, <ul>, <li>,<menu> and <dir>. The design of the first version of HTML language was influenced by the SGML universal markup language. + +#WebDesignHistory"""] ] link = [