nitter/src/formatters.nim

import strutils, strformat, times, uri, tables
import xmltree, htmlparser
import regex

import types, utils, query

from unicode import Rune, `$`

const
  ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
  twRegex = re"(www\.|mobile\.)?twitter\.com"
  igRegex = re"(www\.)?instagram.com"
  cards = "cards.twitter.com/cards"
  tco = "https://t.co"
  nbsp = $Rune(0x000A0)

  wwwRegex = re"https?://(www[0-9]?\.)?"
  manifestRegex = re"(.+(.ts|.m3u8|.vmap))"
  userpicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
  extRegex = re"(\.[A-z]+)$"
  tombstoneRegex = re"\n* *Learn more"

proc stripText*(text: string): string =
  text.replace(nbsp, " ").strip()

proc stripHtml*(text: string): string =
  var html = parseHtml(text)
  for el in html.findAll("a"):
    let link = el.attr("href")
    if "http" in link:
      el[0].text = link
  html.innerText()

proc shortLink*(text: string; length=28): string =
  result = text.replace(wwwRegex, "")
  if result.len > length:
    result = result[0 ..< length] & "…"

proc replaceUrl*(url: string; prefs: Prefs; absolute=""): string =
  result = url
  if prefs.replaceYouTube.len > 0:
    result = result.replace(ytRegex, prefs.replaceYouTube)
    if prefs.replaceYouTube in result:
      result = result.replace("/c/", "/")
  if prefs.replaceInstagram.len > 0:
    result = result.replace(igRegex, prefs.replaceInstagram)
  if prefs.replaceTwitter.len > 0:
    result = result.replace(tco, "https://" & prefs.replaceTwitter & "/t.co")
    result = result.replace(cards, prefs.replaceTwitter & "/cards")
    result = result.replace(twRegex, prefs.replaceTwitter)
  if absolute.len > 0:
    result = result.replace("href=\"/", "href=\"https://" & absolute & "/")

proc proxifyVideo*(manifest: string; proxy: bool): string =
  proc cb(m: RegexMatch; s: string): string =
    result = "https://video.twimg.com" & s[m.group(0)[0]]
    if proxy: result = getVidUrl(result)
  result = manifest.replace(manifestRegex, cb)

proc getUserpic*(userpic: string; style=""): string =
  let pic = userpic.replace(userpicRegex, "$2")
  pic.replace(extRegex, style & "$1")

proc getUserpic*(profile: Profile; style=""): string =
  getUserPic(profile.userpic, style)

proc getVideoEmbed*(cfg: Config; id: int64): string =
  &"https://{cfg.hostname}/i/videos/{id}"

proc pageTitle*(profile: Profile): string =
  &"{profile.fullname} (@{profile.username})"

proc pageTitle*(tweet: Tweet): string =
  &"{pageTitle(tweet.profile)}: \"{stripHtml(tweet.text)}\""

proc pageDesc*(profile: Profile): string =
  if profile.bio.len > 0:
    stripHtml(profile.bio)
  else:
    "The latest tweets from " & profile.fullname

proc getJoinDate*(profile: Profile): string =
  profile.joinDate.format("'Joined' MMMM YYYY")

proc getJoinDateFull*(profile: Profile): string =
  profile.joinDate.format("h:mm tt - d MMM YYYY")

proc getTime*(tweet: Tweet): string =
  tweet.time.format("d/M/yyyy', 'HH:mm:ss")

proc getRfc822Time*(tweet: Tweet): string =
  tweet.time.format("ddd', 'd MMM yyyy HH:mm:ss 'GMT'")

proc getTweetTime*(tweet: Tweet): string =
  tweet.time.format("h:mm tt' · 'MMM d', 'YYYY")

proc getLink*(tweet: Tweet | Quote; focus=true): string =
  if tweet.id == 0: return
  result = &"/{tweet.profile.username}/status/{tweet.id}"
  if focus: result &= "#m"

proc getTombstone*(text: string): string =
  text.replace(tombstoneRegex, "").stripText().strip(chars={' ', '\n'})

proc getTwitterLink*(path: string; params: Table[string, string]): string =
  let
    twitter = parseUri("https://twitter.com")
    username = params.getOrDefault("name")
    query = initQuery(params, username)

  if "/search" notin path:
    return $(twitter / path ? filterParams(params))

  let p = {
    "f": $query.kind,
    "q": genQueryParam(query),
    "src": "typd",
    "max_position": params.getOrDefault("max_position", "0")
  }

  result = $(parseUri("https://twitter.com") / path ? p)
  if username.len > 0:
    result = result.replace("/" & username, "")

proc getLocation*(u: Profile | Tweet): (string, string) =
  if "://" in u.location: return (u.location, "")
  let loc = u.location.split(":")
  let url = if loc.len > 1: "/search?q=place:" & loc[1] else: ""
  (loc[0], url)

proc getSuspended*(username: string): string =
  &"User \"{username}\" has been suspended"
Generalize YouTube regex 2019-12-06 04:37:38 +01:00			`import strutils, strformat, times, uri, tables`
			`import xmltree, htmlparser`
Initial commit 2019-06-20 16:16:20 +02:00			`import regex`

Show Twitter link on search pages 2019-10-08 15:07:10 +02:00			`import types, utils, query`
Initial commit 2019-06-20 16:16:20 +02:00
Add simple emoji support 2019-06-25 02:38:18 +02:00			from unicode import Rune, `$`

Initial commit 2019-06-20 16:16:20 +02:00			`const`
Fix incorrect regex Fixes #109 2020-01-19 08:49:20 +01:00			`ytRegex = re"([A-z.]+\.)?youtu(be\.com\|\.be)"`
			`twRegex = re"(www\.\|mobile\.)?twitter\.com"`
Add Instagram replacement preference Fixes #133 2020-03-29 09:03:06 +02:00			`igRegex = re"(www\.)?instagram.com"`
Fix card links 2020-03-09 00:33:52 +01:00			`cards = "cards.twitter.com/cards"`
Add t.co and /cards link resolvers For t.co links: https://t.co/.. -> nitter.net/t.co/.. For card links: https://cards.twitter.com/cards/.. -> nitter.net/cards/... 2019-12-30 11:41:09 +01:00			`tco = "https://t.co"`
Add simple emoji support 2019-06-25 02:38:18 +02:00			`nbsp = $Rune(0x000A0)`

Turn regex patterns into consts 2020-01-22 13:04:35 +01:00			`wwwRegex = re"https?://(www[0-9]?\.)?"`
			`manifestRegex = re"(.+(.ts\|.m3u8\|.vmap))"`
			`userpicRegex = re"_(normal\|bigger\|mini\|200x200\|400x400)(\.[A-z]+)$"`
			`extRegex = re"(\.[A-z]+)$"`
			`tombstoneRegex = re"\n* *Learn more"`

Add simple emoji support 2019-06-25 02:38:18 +02:00			`proc stripText*(text: string): string =`
			`text.replace(nbsp, " ").strip()`
Initial commit 2019-06-20 16:16:20 +02:00
Minor cleanup, fix empty lines before card links 2019-10-10 17:47:02 +02:00			`proc stripHtml*(text: string): string =`
Unshortify links when stripping html 2019-10-11 19:20:40 +02:00			`var html = parseHtml(text)`
			`for el in html.findAll("a"):`
			`let link = el.attr("href")`
			`if "http" in link:`
			`el[0].text = link`
Minor cleanup, fix empty lines before card links 2019-10-10 17:47:02 +02:00			`html.innerText()`

Initial commit 2019-06-20 16:16:20 +02:00			`proc shortLink*(text: string; length=28): string =`
Turn regex patterns into consts 2020-01-22 13:04:35 +01:00			`result = text.replace(wwwRegex, "")`
Initial commit 2019-06-20 16:16:20 +02:00			`if result.len > length:`
			`result = result[0 ..< length] & "…"`

Refactor hostname to be a runtime option Add a `hostname` field under Server in your conf file, see the updated nitter.conf in the repo for an example. The compile-time option (-d:hostname) is no longer used. 2019-10-21 05:19:00 +02:00			`proc replaceUrl*(url: string; prefs: Prefs; absolute=""): string =`
Fix empty link replacement 2019-08-15 18:45:56 +02:00			`result = url`
Add Invidious/Nitter link replacement preferences 2019-08-15 15:51:20 +02:00			`if prefs.replaceYouTube.len > 0:`
Include 'www.' in twitter/youtube link replacement 2019-08-15 19:19:21 +02:00			`result = result.replace(ytRegex, prefs.replaceYouTube)`
Fix converted youtube channel links 2020-03-09 00:47:00 +01:00			`if prefs.replaceYouTube in result:`
			`result = result.replace("/c/", "/")`
Add Instagram replacement preference Fixes #133 2020-03-29 09:03:06 +02:00			`if prefs.replaceInstagram.len > 0:`
			`result = result.replace(igRegex, prefs.replaceInstagram)`
Add Invidious/Nitter link replacement preferences 2019-08-15 15:51:20 +02:00			`if prefs.replaceTwitter.len > 0:`
Add t.co and /cards link resolvers For t.co links: https://t.co/.. -> nitter.net/t.co/.. For card links: https://cards.twitter.com/cards/.. -> nitter.net/cards/... 2019-12-30 11:41:09 +01:00			`result = result.replace(tco, "https://" & prefs.replaceTwitter & "/t.co")`
Fix card links 2020-03-09 00:33:52 +01:00			`result = result.replace(cards, prefs.replaceTwitter & "/cards")`
			`result = result.replace(twRegex, prefs.replaceTwitter)`
Refactor hostname to be a runtime option Add a `hostname` field under Server in your conf file, see the updated nitter.conf in the repo for an example. The compile-time option (-d:hostname) is no longer used. 2019-10-21 05:19:00 +02:00			`if absolute.len > 0:`
			`result = result.replace("href=\"/", "href=\"https://" & absolute & "/")`
Initial commit 2019-06-20 16:16:20 +02:00
Add video proxy support 2019-08-19 20:53:47 +02:00			`proc proxifyVideo*(manifest: string; proxy: bool): string =`
			`proc cb(m: RegexMatch; s: string): string =`
			`result = "https://video.twimg.com" & s[m.group(0)[0]]`
Restrict image/gif media host instead of hashing 2019-09-13 12:27:04 +02:00			`if proxy: result = getVidUrl(result)`
Turn regex patterns into consts 2020-01-22 13:04:35 +01:00			`result = manifest.replace(manifestRegex, cb)`
Add video proxy support 2019-08-19 20:53:47 +02:00
Initial commit 2019-06-20 16:16:20 +02:00			`proc getUserpic*(userpic: string; style=""): string =`
Turn regex patterns into consts 2020-01-22 13:04:35 +01:00			`let pic = userpic.replace(userpicRegex, "$2")`
			`pic.replace(extRegex, style & "$1")`
Initial commit 2019-06-20 16:16:20 +02:00
			`proc getUserpic*(profile: Profile; style=""): string =`
			`getUserPic(profile.userpic, style)`

Change ID types to int64 2019-12-10 00:39:12 +01:00			`proc getVideoEmbed*(cfg: Config; id: int64): string =`
Support video embeds Fixes #66 2019-12-06 15:15:56 +01:00			`&"https://{cfg.hostname}/i/videos/{id}"`
Implement link previews 2019-08-07 22:02:19 +02:00
Add dynamic page title 2019-06-24 22:40:48 +02:00			`proc pageTitle*(profile: Profile): string =`
Add server config file 2019-07-31 02:15:43 +02:00			`&"{profile.fullname} (@{profile.username})"`
Ensure correct text formatting 2019-06-25 04:52:38 +02:00
Add tweet page titles Fixes #124 2020-03-29 09:15:05 +02:00			`proc pageTitle*(tweet: Tweet): string =`
			`&"{pageTitle(tweet.profile)}: \"{stripHtml(tweet.text)}\""`

Implement link previews 2019-08-07 22:02:19 +02:00			`proc pageDesc*(profile: Profile): string =`
Display profile bio in preview 2019-10-11 18:43:47 +02:00			`if profile.bio.len > 0:`
			`stripHtml(profile.bio)`
			`else:`
			`"The latest tweets from " & profile.fullname`
Implement link previews 2019-08-07 22:02:19 +02:00
Revamp profile api to display more metadata 2019-08-11 21:26:55 +02:00			`proc getJoinDate*(profile: Profile): string =`
			`profile.joinDate.format("'Joined' MMMM YYYY")`

			`proc getJoinDateFull*(profile: Profile): string =`
			`profile.joinDate.format("h:mm tt - d MMM YYYY")`

Ensure correct text formatting 2019-06-25 04:52:38 +02:00			`proc getTime*(tweet: Tweet): string =`
Improve RSS validity 2019-09-15 11:14:03 +02:00			`tweet.time.format("d/M/yyyy', 'HH:mm:ss")`

			`proc getRfc822Time*(tweet: Tweet): string =`
			`tweet.time.format("ddd', 'd MMM yyyy HH:mm:ss 'GMT'")`
Generate tweet links 2019-07-01 23:14:36 +02:00
Show published date under main tweets Fixes #59 2019-10-08 13:28:57 +02:00			`proc getTweetTime*(tweet: Tweet): string =`
			`tweet.time.format("h:mm tt' · 'MMM d', 'YYYY")`

Focus main tweet in threads 2019-10-22 09:17:58 +02:00			`proc getLink*(tweet: Tweet \| Quote; focus=true): string =`
Use int for tweet ids for correct thread sorting 2019-10-10 18:22:14 +02:00			`if tweet.id == 0: return`
Focus main tweet in threads 2019-10-22 09:17:58 +02:00			`result = &"/{tweet.profile.username}/status/{tweet.id}"`
			`if focus: result &= "#m"`
Show reasons for tweets being withheld Fixes #33 2019-09-08 14:34:26 +02:00
			`proc getTombstone*(text: string): string =`
Turn regex patterns into consts 2020-01-22 13:04:35 +01:00			`text.replace(tombstoneRegex, "").stripText().strip(chars={' ', '\n'})`
Show Twitter link on search pages 2019-10-08 15:07:10 +02:00
			`proc getTwitterLink*(path: string; params: Table[string, string]): string =`
			`let`
			`twitter = parseUri("https://twitter.com")`
			`username = params.getOrDefault("name")`
			`query = initQuery(params, username)`

			`if "/search" notin path:`
Use "max_position" instead of "after" for compat 2019-10-08 15:15:47 +02:00			`return $(twitter / path ? filterParams(params))`
Show Twitter link on search pages 2019-10-08 15:07:10 +02:00
			`let p = {`
			`"f": $query.kind,`
			`"q": genQueryParam(query),`
			`"src": "typd",`
Use "max_position" instead of "after" for compat 2019-10-08 15:15:47 +02:00			`"max_position": params.getOrDefault("max_position", "0")`
Show Twitter link on search pages 2019-10-08 15:07:10 +02:00			`}`

			`result = $(parseUri("https://twitter.com") / path ? p)`
			`if username.len > 0:`
			`result = result.replace("/" & username, "")`
Support tweet locations 2019-12-21 05:44:58 +01:00
			`proc getLocation*(u: Profile \| Tweet): (string, string) =`
Fix displaying urls in location fields 2020-03-09 01:03:24 +01:00			`if "://" in u.location: return (u.location, "")`
Support tweet locations 2019-12-21 05:44:58 +01:00			`let loc = u.location.split(":")`
			`let url = if loc.len > 1: "/search?q=place:" & loc[1] else: ""`
			`(loc[0], url)`
Detect suspended accounts 2020-04-14 23:56:31 +02:00
			`proc getSuspended*(username: string): string =`
			`&"User \"{username}\" has been suspended"`