nitter/src/formatters.nim

196 lines
6.3 KiB
Nim
Raw Normal View History

2021-12-27 02:37:38 +01:00
# SPDX-License-Identifier: AGPL-3.0-only
import strutils, strformat, times, uri, tables, xmltree, htmlparser, htmlgen
import std/[enumerate, re]
2019-10-08 15:07:10 +02:00
import types, utils, query
2019-06-20 16:16:20 +02:00
const
cards = "cards.twitter.com/cards"
tco = "https://t.co"
twitter = parseUri("https://twitter.com")
let
twRegex = re"(?<=(?<!\S)https:\/\/|(?<=\s))(www\.|mobile\.)?twitter\.com"
twLinkRegex = re"""<a href="https:\/\/twitter.com([^"]+)">twitter\.com(\S+)</a>"""
2020-01-19 08:49:20 +01:00
ytRegex = re"([A-z.]+\.)?youtu(be\.com|\.be)"
igRegex = re"(www\.)?instagram\.com"
rdRegex = re"(?<![.b])((www|np|new|amp|old)\.)?reddit.com"
rdShortRegex = re"(?<![.b])redd\.it\/"
# Videos cannot be supported uniformly between Teddit and Libreddit,
# so v.redd.it links will not be replaced.
# Images aren't supported due to errors from Teddit when the image
# wasn't first displayed via a post on the Teddit instance.
2020-01-22 13:04:35 +01:00
wwwRegex = re"https?://(www[0-9]?\.)?"
2020-06-09 15:04:38 +02:00
m3u8Regex = re"""url="(.+.m3u8)""""
2022-01-06 03:57:14 +01:00
userPicRegex = re"_(normal|bigger|mini|200x200|400x400)(\.[A-z]+)$"
2020-01-22 13:04:35 +01:00
extRegex = re"(\.[A-z]+)$"
illegalXmlRegex = re"(*UTF8)[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]"
2021-01-08 02:25:43 +01:00
proc getUrlPrefix*(cfg: Config): string =
2021-12-30 04:18:40 +01:00
if cfg.useHttps: https & cfg.hostname
2021-01-08 02:25:43 +01:00
else: "http://" & cfg.hostname
2022-01-14 18:01:47 +01:00
proc shortLink*(text: string; length=28): string =
result = text.replace(wwwRegex, "")
if result.len > length:
result = result[0 ..< length] & ""
proc stripHtml*(text: string; shorten=false): string =
2019-10-11 19:20:40 +02:00
var html = parseHtml(text)
for el in html.findAll("a"):
let link = el.attr("href")
if "http" in link:
2020-06-01 02:25:39 +02:00
if el.len == 0: continue
2022-01-14 18:01:47 +01:00
el[0].text =
if shorten: link.shortLink
else: link
html.innerText()
proc sanitizeXml*(text: string): string =
text.replace(illegalXmlRegex, "")
2021-12-27 02:27:49 +01:00
proc replaceUrls*(body: string; prefs: Prefs; absolute=""): string =
result = body
if prefs.replaceYouTube.len > 0 and "youtu" in result:
result = result.replace(ytRegex, prefs.replaceYouTube)
2020-03-09 00:47:00 +01:00
if prefs.replaceYouTube in result:
result = result.replace("/c/", "/")
if prefs.replaceTwitter.len > 0 and ("twitter.com" in body or tco in body):
2021-12-30 04:18:40 +01:00
result = result.replace(tco, https & prefs.replaceTwitter & "/t.co")
2020-03-09 00:33:52 +01:00
result = result.replace(cards, prefs.replaceTwitter & "/cards")
result = result.replace(twRegex, prefs.replaceTwitter)
result = result.replacef(twLinkRegex, a(
prefs.replaceTwitter & "$2", href = https & prefs.replaceTwitter & "$1"))
if prefs.replaceReddit.len > 0 and ("reddit.com" in result or "redd.it" in result):
result = result.replace(rdShortRegex, prefs.replaceReddit & "/comments/")
result = result.replace(rdRegex, prefs.replaceReddit)
if prefs.replaceReddit in result and "/gallery/" in result:
result = result.replace("/gallery/", "/comments/")
if prefs.replaceInstagram.len > 0 and "instagram.com" in result:
result = result.replace(igRegex, prefs.replaceInstagram)
if absolute.len > 0 and "href" in result:
2021-01-08 02:25:43 +01:00
result = result.replace("href=\"/", "href=\"" & absolute & "/")
2019-06-20 16:16:20 +02:00
2020-06-09 15:04:38 +02:00
proc getM3u8Url*(content: string): string =
var matches: array[1, string]
if re.find(content, m3u8Regex, matches) != -1:
result = matches[0]
2020-06-09 15:04:38 +02:00
2019-08-19 20:53:47 +02:00
proc proxifyVideo*(manifest: string; proxy: bool): string =
var replacements: seq[(string, string)]
for line in manifest.splitLines:
let url =
if line.startsWith("#EXT-X-MAP:URI"): line[16 .. ^2]
else: line
2022-01-12 19:19:14 +01:00
if url.startsWith('/'):
let path = "https://video.twimg.com" & url
replacements.add (url, if proxy: path.getVidUrl else: path)
return manifest.multiReplace(replacements)
2019-08-19 20:53:47 +02:00
2022-01-06 03:57:14 +01:00
proc getUserPic*(userPic: string; style=""): string =
userPic.replacef(userPicRegex, "$2").replacef(extRegex, style & "$1")
2019-06-20 16:16:20 +02:00
proc getUserPic*(user: User; style=""): string =
getUserPic(user.userPic, style)
2019-06-20 16:16:20 +02:00
2019-12-10 00:39:12 +01:00
proc getVideoEmbed*(cfg: Config; id: int64): string =
2021-01-08 02:25:43 +01:00
&"{getUrlPrefix(cfg)}/i/videos/{id}"
2019-08-07 22:02:19 +02:00
proc pageTitle*(user: User): string =
&"{user.fullname} (@{user.username})"
2019-06-25 04:52:38 +02:00
2020-03-29 09:15:05 +02:00
proc pageTitle*(tweet: Tweet): string =
&"{pageTitle(tweet.user)}: \"{stripHtml(tweet.text)}\""
2020-03-29 09:15:05 +02:00
proc pageDesc*(user: User): string =
if user.bio.len > 0:
stripHtml(user.bio)
2019-10-11 18:43:47 +02:00
else:
"The latest tweets from " & user.fullname
2019-08-07 22:02:19 +02:00
proc getJoinDate*(user: User): string =
user.joinDate.format("'Joined' MMMM YYYY")
proc getJoinDateFull*(user: User): string =
user.joinDate.format("h:mm tt - d MMM YYYY")
2019-06-25 04:52:38 +02:00
proc getTime*(tweet: Tweet): string =
2022-01-03 03:52:39 +01:00
tweet.time.format("MMM d', 'YYYY' · 'h:mm tt' UTC'")
2019-09-15 11:14:03 +02:00
proc getRfc822Time*(tweet: Tweet): string =
tweet.time.format("ddd', 'dd MMM yyyy HH:mm:ss 'GMT'")
2019-07-01 23:14:36 +02:00
2020-06-01 02:25:39 +02:00
proc getShortTime*(tweet: Tweet): string =
2020-06-02 21:06:44 +02:00
let now = now()
2021-12-20 03:11:12 +01:00
let since = now - tweet.time
2020-06-02 21:06:44 +02:00
2021-12-20 03:11:12 +01:00
if now.year != tweet.time.year:
2020-06-01 02:25:39 +02:00
result = tweet.time.format("d MMM yyyy")
elif since.inDays >= 1:
result = tweet.time.format("MMM d")
elif since.inHours >= 1:
result = $since.inHours & "h"
elif since.inMinutes >= 1:
result = $since.inMinutes & "m"
elif since.inSeconds > 1:
result = $since.inSeconds & "s"
else:
result = "now"
proc getLink*(tweet: Tweet; focus=true): string =
if tweet.id == 0: return
var username = tweet.user.username
2020-06-01 02:25:39 +02:00
if username.len == 0:
username = "i"
result = &"/{username}/status/{tweet.id}"
2019-10-22 09:17:58 +02:00
if focus: result &= "#m"
2019-10-08 15:07:10 +02:00
proc getTwitterLink*(path: string; params: Table[string, string]): string =
var
2019-10-08 15:07:10 +02:00
username = params.getOrDefault("name")
query = initQuery(params, username)
path = path
if "," in username:
query.fromUser = username.split(",")
path = "/search"
2019-10-08 15:07:10 +02:00
if "/search" notin path and query.fromUser.len < 2:
return $(twitter / path)
2019-10-08 15:07:10 +02:00
let p = {
2020-06-02 22:31:46 +02:00
"f": if query.kind == users: "user" else: "live",
2019-10-08 15:07:10 +02:00
"q": genQueryParam(query),
2020-06-01 02:25:39 +02:00
"src": "typed_query"
2019-10-08 15:07:10 +02:00
}
result = $(twitter / path ? p)
2019-10-08 15:07:10 +02:00
if username.len > 0:
result = result.replace("/" & username, "")
2019-12-21 05:44:58 +01:00
proc getLocation*(u: User | Tweet): (string, string) =
2020-03-09 01:03:24 +01:00
if "://" in u.location: return (u.location, "")
2019-12-21 05:44:58 +01:00
let loc = u.location.split(":")
let url = if loc.len > 1: "/search?q=place:" & loc[1] else: ""
(loc[0], url)
2020-04-14 23:56:31 +02:00
proc getSuspended*(username: string): string =
&"User \"{username}\" has been suspended"
2022-01-10 16:18:10 +01:00
proc titleize*(str: string): string =
const
lowercase = {'a'..'z'}
delims = {' ', '('}
result = str
for i, c in enumerate(str):
if c in lowercase and (i == 0 or str[i - 1] in delims):
result[i] = c.toUpperAscii