nitter/src/formatters.nim

import strutils, strformat, sequtils, htmlgen, xmltree, times, uri
import regex

import types, utils

from unicode import Rune, `$`

const
  urlRegex = re"((https?|ftp)://(-\.)?([^\s/?\.#]+\.?)+([/\?][^\s\)]*)?)"
  emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
  usernameRegex = re"(^|[^A-z0-9_?\/])@([A-z0-9_]+)"
  picRegex = re"pic.twitter.com/[^ ]+"
  ellipsisRegex = re" ?…"
  hashtagRegex = re"([^\S]|^)([#$]\w+)"
  ytRegex = re"(www.|m.)?youtu(be.com|.be)"
  twRegex = re"(www.|mobile.)?twitter.com"
  nbsp = $Rune(0x000A0)

const hostname {.strdefine.} = "nitter.net"

proc stripText*(text: string): string =
  text.replace(nbsp, " ").strip()

proc shortLink*(text: string; length=28): string =
  result = text.replace(re"https?://(www.)?", "")
  if result.len > length:
    result = result[0 ..< length] & "…"

proc toLink*(url, text: string): string =
  a(text, href=url)

proc reUrlToShortLink*(m: RegexMatch; s: string): string =
  let url = s[m.group(0)[0]]
  toLink(url, shortLink(url))

proc reUrlToLink*(m: RegexMatch; s: string): string =
  let url = s[m.group(0)[0]]
  toLink(url, url.replace(re"https?://(www.)?", ""))

proc reEmailToLink*(m: RegexMatch; s: string): string =
  let url = s[m.group(0)[0]]
  toLink("mailto://" & url, url)

proc reHashtagToLink*(m: RegexMatch; s: string): string =
  result = if m.group(0).len > 0: s[m.group(0)[0]] else: ""
  let hash = s[m.group(1)[0]]
  let link = toLink("/search?q=" & encodeUrl(hash), hash)
  if hash.any(isAlphaAscii):
    result &= link
  else:
    result &= hash

proc reUsernameToLink*(m: RegexMatch; s: string): string =
  var username = ""
  var pretext = ""

  let pre = m.group(0)
  let match = m.group(1)

  username = s[match[0]]

  if pre.len > 0:
    pretext = s[pre[0]]

  pretext & toLink("/" & username, "@" & username)

proc reUsernameToFullLink*(m: RegexMatch; s: string): string =
  result = reUsernameToLink(m, s)
  result = result.replace("href=\"/", &"href=\"https://{hostname}/")

proc replaceUrl*(url: string; prefs: Prefs): string =
  result = url
  if prefs.replaceYouTube.len > 0:
    result = result.replace(ytRegex, prefs.replaceYouTube)
  if prefs.replaceTwitter.len > 0:
    result = result.replace(twRegex, prefs.replaceTwitter)

proc linkifyText*(text: string; prefs: Prefs; rss=false): string =
  result = xmltree.escape(stripText(text))
  result = result.replace(ellipsisRegex, " ")
  result = result.replace(emailRegex, reEmailToLink)
  if rss:
    result = result.replace(urlRegex, reUrlToLink)
    result = result.replace(usernameRegex, reUsernameToFullLink)
  else:
    result = result.replace(urlRegex, reUrlToShortLink)
    result = result.replace(usernameRegex, reUsernameToLink)
  result = result.replace(hashtagRegex, reHashtagToLink)
  result = result.replace(re"([^\s\(\n%])<a", "$1 <a")
  result = result.replace(re"</a>\s+([;.,!\)'%]|&apos;)", "</a>$1")
  result = result.replace(re"^\. <a", ".<a")
  result = result.replaceUrl(prefs)

proc stripTwitterUrls*(text: string): string =
  result = text
  result = result.replace(picRegex, "")
  result = result.replace(ellipsisRegex, "")

proc proxifyVideo*(manifest: string; proxy: bool): string =
  proc cb(m: RegexMatch; s: string): string =
    result = "https://video.twimg.com" & s[m.group(0)[0]]
    if proxy: result = getVidUrl(result)
  result = manifest.replace(re"(.+(.ts|.m3u8|.vmap))", cb)

proc getUserpic*(userpic: string; style=""): string =
  let pic = userpic.replace(re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$", "$2")
  pic.replace(re"(\.[A-z]+)$", style & "$1")

proc getUserpic*(profile: Profile; style=""): string =
  getUserPic(profile.userpic, style)

proc getVideoEmbed*(id: string): string =
  &"https://twitter.com/i/videos/{id}?embed_source=facebook"

proc pageTitle*(profile: Profile): string =
  &"{profile.fullname} (@{profile.username})"

proc pageDesc*(profile: Profile): string =
  "The latest tweets from " & profile.fullname

proc getJoinDate*(profile: Profile): string =
  profile.joinDate.format("'Joined' MMMM YYYY")

proc getJoinDateFull*(profile: Profile): string =
  profile.joinDate.format("h:mm tt - d MMM YYYY")

proc getTime*(tweet: Tweet): string =
  tweet.time.format("d/M/yyyy', 'HH:mm:ss")

proc getRfc822Time*(tweet: Tweet): string =
  tweet.time.format("ddd', 'd MMM yyyy HH:mm:ss 'GMT'")

proc getTweetTime*(tweet: Tweet): string =
  tweet.time.format("h:mm tt' · 'MMM d', 'YYYY")

proc getLink*(tweet: Tweet | Quote): string =
  if tweet.id.len == 0: return
  &"/{tweet.profile.username}/status/{tweet.id}"

proc getTombstone*(text: string): string =
  text.replace(re"\n* *Learn more", "").stripText().strip(chars={' ', '\n'})
Linkify hashtags Fixes #34 2019-09-19 05:22:45 +02:00			`import strutils, strformat, sequtils, htmlgen, xmltree, times, uri`
Initial commit 2019-06-20 16:16:20 +02:00			`import regex`

Add video proxy support 2019-08-19 20:53:47 +02:00			`import types, utils`
Initial commit 2019-06-20 16:16:20 +02:00
Add simple emoji support 2019-06-25 02:38:18 +02:00			from unicode import Rune, `$`

Initial commit 2019-06-20 16:16:20 +02:00			`const`
Fix url regex 2019-06-27 22:30:00 +02:00			`urlRegex = re"((https?\|ftp)://(-\.)?([^\s/?\.#]+\.?)+([/\?][^\s\)]*)?)"`
Initial commit 2019-06-20 16:16:20 +02:00			`emailRegex = re"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"`
Fix username linkification bug 2019-09-20 01:46:20 +02:00			`usernameRegex = re"(^\|[^A-z0-9_?\/])@([A-z0-9_]+)"`
Initial commit 2019-06-20 16:16:20 +02:00			`picRegex = re"pic.twitter.com/[^ ]+"`
			`ellipsisRegex = re" ?…"`
Improve hashtag regex Fixes #47 2019-09-27 00:53:11 +02:00			`hashtagRegex = re"([^\S]\|^)([#$]\w+)"`
Improve youtube/twitter regex 2019-09-14 08:56:46 +02:00			`ytRegex = re"(www.\|m.)?youtu(be.com\|.be)"`
			`twRegex = re"(www.\|mobile.)?twitter.com"`
Add simple emoji support 2019-06-25 02:38:18 +02:00			`nbsp = $Rune(0x000A0)`

Add timeline RSS support 2019-09-15 09:57:45 +02:00			`const hostname {.strdefine.} = "nitter.net"`

Add simple emoji support 2019-06-25 02:38:18 +02:00			`proc stripText*(text: string): string =`
			`text.replace(nbsp, " ").strip()`
Initial commit 2019-06-20 16:16:20 +02:00
			`proc shortLink*(text: string; length=28): string =`
			`result = text.replace(re"https?://(www.)?", "")`
			`if result.len > length:`
			`result = result[0 ..< length] & "…"`

Add timeline RSS support 2019-09-15 09:57:45 +02:00			`proc toLink*(url, text: string): string =`
			`a(text, href=url)`
Initial commit 2019-06-20 16:16:20 +02:00
Add timeline RSS support 2019-09-15 09:57:45 +02:00			`proc reUrlToShortLink*(m: RegexMatch; s: string): string =`
Initial commit 2019-06-20 16:16:20 +02:00			`let url = s[m.group(0)[0]]`
Render tweet quotes 2019-06-24 08:07:36 +02:00			`toLink(url, shortLink(url))`
Initial commit 2019-06-20 16:16:20 +02:00
Add timeline RSS support 2019-09-15 09:57:45 +02:00			`proc reUrlToLink*(m: RegexMatch; s: string): string =`
			`let url = s[m.group(0)[0]]`
			`toLink(url, url.replace(re"https?://(www.)?", ""))`

Initial commit 2019-06-20 16:16:20 +02:00			`proc reEmailToLink*(m: RegexMatch; s: string): string =`
			`let url = s[m.group(0)[0]]`
			`toLink("mailto://" & url, url)`

Linkify hashtags Fixes #34 2019-09-19 05:22:45 +02:00			`proc reHashtagToLink*(m: RegexMatch; s: string): string =`
			`result = if m.group(0).len > 0: s[m.group(0)[0]] else: ""`
			`let hash = s[m.group(1)[0]]`
Fix hashtag linkifying 2019-10-08 13:16:48 +02:00			`let link = toLink("/search?q=" & encodeUrl(hash), hash)`
Linkify hashtags Fixes #34 2019-09-19 05:22:45 +02:00			`if hash.any(isAlphaAscii):`
			`result &= link`
			`else:`
			`result &= hash`

Initial commit 2019-06-20 16:16:20 +02:00			`proc reUsernameToLink*(m: RegexMatch; s: string): string =`
Cleaner image code 2019-06-25 03:48:57 +02:00			`var username = ""`
			`var pretext = ""`
Initial commit 2019-06-20 16:16:20 +02:00
Cleaner image code 2019-06-25 03:48:57 +02:00			`let pre = m.group(0)`
			`let match = m.group(1)`
Initial commit 2019-06-20 16:16:20 +02:00
			`username = s[match[0]]`

			`if pre.len > 0:`
			`pretext = s[pre[0]]`

			`pretext & toLink("/" & username, "@" & username)`

Add timeline RSS support 2019-09-15 09:57:45 +02:00			`proc reUsernameToFullLink*(m: RegexMatch; s: string): string =`
			`result = reUsernameToLink(m, s)`
			`result = result.replace("href=\"/", &"href=\"https://{hostname}/")`
Add Invidious/Nitter link replacement preferences 2019-08-15 15:51:20 +02:00
			`proc replaceUrl*(url: string; prefs: Prefs): string =`
Fix empty link replacement 2019-08-15 18:45:56 +02:00			`result = url`
Add Invidious/Nitter link replacement preferences 2019-08-15 15:51:20 +02:00			`if prefs.replaceYouTube.len > 0:`
Include 'www.' in twitter/youtube link replacement 2019-08-15 19:19:21 +02:00			`result = result.replace(ytRegex, prefs.replaceYouTube)`
Add Invidious/Nitter link replacement preferences 2019-08-15 15:51:20 +02:00			`if prefs.replaceTwitter.len > 0:`
Include 'www.' in twitter/youtube link replacement 2019-08-15 19:19:21 +02:00			`result = result.replace(twRegex, prefs.replaceTwitter)`
Initial commit 2019-06-20 16:16:20 +02:00
Add timeline RSS support 2019-09-15 09:57:45 +02:00			`proc linkifyText*(text: string; prefs: Prefs; rss=false): string =`
			`result = xmltree.escape(stripText(text))`
Fix username linkification bug 2019-09-20 01:46:20 +02:00			`result = result.replace(ellipsisRegex, " ")`
Add timeline RSS support 2019-09-15 09:57:45 +02:00			`result = result.replace(emailRegex, reEmailToLink)`
			`if rss:`
			`result = result.replace(urlRegex, reUrlToLink)`
			`result = result.replace(usernameRegex, reUsernameToFullLink)`
			`else:`
			`result = result.replace(urlRegex, reUrlToShortLink)`
			`result = result.replace(usernameRegex, reUsernameToLink)`
Fix username linkification bug 2019-09-20 01:46:20 +02:00			`result = result.replace(hashtagRegex, reHashtagToLink)`
Add timeline RSS support 2019-09-15 09:57:45 +02:00			`result = result.replace(re"([^\s\(\n%])<a", "$1 <a")`
			`result = result.replace(re"</a>\s+([;.,!\)'%]\|')", "</a>$1")`
			`result = result.replace(re"^\. <a", ".<a")`
			`result = result.replaceUrl(prefs)`

Initial commit 2019-06-20 16:16:20 +02:00			`proc stripTwitterUrls*(text: string): string =`
			`result = text`
			`result = result.replace(picRegex, "")`
			`result = result.replace(ellipsisRegex, "")`

Add video proxy support 2019-08-19 20:53:47 +02:00			`proc proxifyVideo*(manifest: string; proxy: bool): string =`
			`proc cb(m: RegexMatch; s: string): string =`
			`result = "https://video.twimg.com" & s[m.group(0)[0]]`
Restrict image/gif media host instead of hashing 2019-09-13 12:27:04 +02:00			`if proxy: result = getVidUrl(result)`
Add video proxy support 2019-08-19 20:53:47 +02:00			`result = manifest.replace(re"(.+(.ts\|.m3u8\|.vmap))", cb)`

Initial commit 2019-06-20 16:16:20 +02:00			`proc getUserpic*(userpic: string; style=""): string =`
Revamp profile api to display more metadata 2019-08-11 21:26:55 +02:00			`let pic = userpic.replace(re"_(normal\|bigger\|mini\|200x200\|400x400)(\.[A-z]+)$", "$2")`
Initial commit 2019-06-20 16:16:20 +02:00			`pic.replace(re"(\.[A-z]+)$", style & "$1")`

			`proc getUserpic*(profile: Profile; style=""): string =`
			`getUserPic(profile.userpic, style)`

Support gif link previews 2019-08-07 22:27:24 +02:00			`proc getVideoEmbed*(id: string): string =`
			`&"https://twitter.com/i/videos/{id}?embed_source=facebook"`
Implement link previews 2019-08-07 22:02:19 +02:00
Add dynamic page title 2019-06-24 22:40:48 +02:00			`proc pageTitle*(profile: Profile): string =`
Add server config file 2019-07-31 02:15:43 +02:00			`&"{profile.fullname} (@{profile.username})"`
Ensure correct text formatting 2019-06-25 04:52:38 +02:00
Implement link previews 2019-08-07 22:02:19 +02:00			`proc pageDesc*(profile: Profile): string =`
			`"The latest tweets from " & profile.fullname`

Revamp profile api to display more metadata 2019-08-11 21:26:55 +02:00			`proc getJoinDate*(profile: Profile): string =`
			`profile.joinDate.format("'Joined' MMMM YYYY")`

			`proc getJoinDateFull*(profile: Profile): string =`
			`profile.joinDate.format("h:mm tt - d MMM YYYY")`

Ensure correct text formatting 2019-06-25 04:52:38 +02:00			`proc getTime*(tweet: Tweet): string =`
Improve RSS validity 2019-09-15 11:14:03 +02:00			`tweet.time.format("d/M/yyyy', 'HH:mm:ss")`

			`proc getRfc822Time*(tweet: Tweet): string =`
			`tweet.time.format("ddd', 'd MMM yyyy HH:mm:ss 'GMT'")`
Generate tweet links 2019-07-01 23:14:36 +02:00
Show published date under main tweets Fixes #59 2019-10-08 13:28:57 +02:00			`proc getTweetTime*(tweet: Tweet): string =`
			`tweet.time.format("h:mm tt' · 'MMM d', 'YYYY")`

Generate tweet links 2019-07-01 23:14:36 +02:00			`proc getLink*(tweet: Tweet \| Quote): string =`
Fix "more replies" link for unavailable tweets 2019-09-24 16:01:09 +02:00			`if tweet.id.len == 0: return`
Fix getLink typo 2019-07-01 23:55:19 +02:00			`&"/{tweet.profile.username}/status/{tweet.id}"`
Show reasons for tweets being withheld Fixes #33 2019-09-08 14:34:26 +02:00
			`proc getTombstone*(text: string): string =`
			`text.replace(re"\n* *Learn more", "").stripText().strip(chars={' ', '\n'})`