nitter/src/parserutils.nim

217 lines
6.4 KiB
Nim
Raw Normal View History

import strutils, times, macros, htmlgen, unicode, options
import regex, packedjson
2020-06-01 02:16:24 +02:00
import types, utils, formatters
const
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
unReplace = "$1<a href=\"/$2\">@$2</a>"
2020-06-06 10:17:19 +02:00
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
2020-06-01 02:16:24 +02:00
template isNull*(js: JsonNode): bool = js.kind == JNull
template notNull*(js: JsonNode): bool = js.kind != JNull
2020-06-01 02:16:24 +02:00
template `?`*(js: JsonNode): untyped =
let j = js
if j.isNull: return
2020-06-01 02:16:24 +02:00
else: j
template `with`*(ident, value, body): untyped =
block:
let ident {.inject.} = value
2020-06-01 13:40:26 +02:00
if ident != nil: body
2020-06-01 02:16:24 +02:00
template `with`*(ident; value: JsonNode; body): untyped =
block:
let ident {.inject.} = value
if value.notNull: body
2020-06-01 02:16:24 +02:00
2020-06-01 13:47:43 +02:00
template getCursor*(js: JsonNode): string =
2020-06-01 02:16:24 +02:00
js{"content", "operation", "cursor", "value"}.getStr
2020-06-01 21:53:21 +02:00
template getError*(js: JsonNode): Error =
if js.kind != JArray or js.len == 0: null
else: Error(js[0]{"code"}.getInt)
2020-06-01 13:47:43 +02:00
template parseTime(time: string; f: static string; flen: int): Time =
2020-06-01 02:16:24 +02:00
if time.len != flen: return
2020-06-02 21:06:44 +02:00
parse(time, f).toTime
2020-06-01 02:16:24 +02:00
proc getDateTime*(js: JsonNode): Time =
parseTime(js.getStr, "yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
proc getTime*(js: JsonNode): Time =
parseTime(js.getStr, "ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
2020-06-01 13:47:43 +02:00
proc getId*(id: string): string {.inline.} =
2020-06-01 02:16:24 +02:00
let start = id.rfind("-")
if start < 0: return id
id[start + 1 ..< id.len]
2020-06-01 13:47:43 +02:00
proc getId*(js: JsonNode): int64 {.inline.} =
2020-06-01 02:16:24 +02:00
case js.kind
of JString: return parseBiggestInt(js.getStr("0"))
of JInt: return js.getBiggestInt()
else: return 0
2020-11-07 22:48:49 +01:00
proc getEntryId*(js: JsonNode): string {.inline.} =
let entry = js{"entryId"}.getStr
if entry.len == 0: return
if "tweet" in entry:
return entry.getId
elif "tombstone" in entry:
return js{"content", "item", "content", "tombstone", "tweet", "id"}.getStr
else:
echo "unknown entry: ", entry
return
2020-06-01 13:47:43 +02:00
template getStrVal*(js: JsonNode; default=""): string =
2020-06-01 02:16:24 +02:00
js{"string_value"}.getStr(default)
proc getImageStr*(js: JsonNode): string =
result = js.getStr
result.removePrefix(https)
result.removePrefix(twimg)
template getImageVal*(js: JsonNode): string =
js{"image_value", "url"}.getImageStr
2020-06-01 02:16:24 +02:00
proc getCardUrl*(js: JsonNode; kind: CardKind): string =
result = js{"website_url"}.getStrVal
if kind == promoVideoConvo:
result = js{"thank_you_url"}.getStrVal(result)
if result.startsWith("card://"):
result = ""
2020-06-01 02:16:24 +02:00
proc getCardDomain*(js: JsonNode; kind: CardKind): string =
result = js{"vanity_url"}.getStrVal(js{"domain"}.getStr)
if kind == promoVideoConvo:
result = js{"thank_you_vanity_url"}.getStrVal(result)
proc getCardTitle*(js: JsonNode; kind: CardKind): string =
result = js{"title"}.getStrVal
if kind == promoVideoConvo:
result = js{"thank_you_text"}.getStrVal(result)
2020-06-10 16:13:40 +02:00
elif kind == liveEvent:
result = js{"event_category"}.getStrVal
2020-06-10 16:13:40 +02:00
elif kind in {videoDirectMessage, imageDirectMessage}:
result = js{"cta1"}.getStrVal
2020-06-01 02:16:24 +02:00
proc getBanner*(js: JsonNode): string =
let url = js{"profile_banner_url"}.getImageStr
2020-06-01 02:16:24 +02:00
if url.len > 0:
return url & "/1500x500"
let color = js{"profile_link_color"}.getStr
if color.len > 0:
return '#' & color
# use primary color from profile picture color histrogram
with p, js{"profile_image_extensions", "mediaColor", "r", "ok", "palette"}:
if p.len > 0:
2020-06-01 13:50:06 +02:00
let pal = p[0]{"rgb"}
2020-06-01 02:16:24 +02:00
result = "#"
2020-06-01 13:50:06 +02:00
result.add toHex(pal{"red"}.getInt, 2)
result.add toHex(pal{"green"}.getInt, 2)
result.add toHex(pal{"blue"}.getInt, 2)
2020-06-01 02:16:24 +02:00
return
return "#161616"
proc getTombstone*(js: JsonNode): string =
result = js{"tombstoneInfo", "richText", "text"}.getStr
result.removeSuffix(" Learn more")
2020-06-01 02:16:24 +02:00
template getSlice(text: string; slice: seq[int]): string =
text.runeSubStr(slice[0], slice[1] - slice[0])
proc getSlice(text: string; js: JsonNode): string =
if js.kind != JArray or js.len < 2 or js[0].kind != JInt: return text
2020-06-01 02:16:24 +02:00
let slice = @[js{0}.getInt, js{1}.getInt]
2020-06-01 02:16:24 +02:00
text.getSlice(slice)
proc expandUrl(text: var string; js: JsonNode; tLen: int; hideTwitter=false) =
let u = js{"url"}.getStr
if u.len == 0 or u notin text:
return
let
url = js{"expanded_url"}.getStr
slice = js{"indices"}[1].getInt
2020-06-01 02:16:24 +02:00
if hideTwitter and slice >= tLen and url.isTwitterUrl:
2020-06-01 02:16:24 +02:00
text = text.replace(u, "")
text.removeSuffix(' ')
text.removeSuffix('\n')
else:
text = text.replace(u, a(shortLink(url), href=url))
proc expandMention(text: var string; orig: string; js: JsonNode) =
let
name = js{"name"}.getStr
href = '/' & js{"screen_name"}.getStr
uname = orig.getSlice(js{"indices"})
text = text.replace(uname, a(uname, href=href, title=name))
proc expandProfileEntities*(profile: var Profile; js: JsonNode) =
let
orig = profile.bio
ent = ? js{"entities"}
with urls, ent{"url", "urls"}:
profile.website = urls[0]{"expanded_url"}.getStr
with urls, ent{"description", "urls"}:
for u in urls: profile.bio.expandUrl(u, orig.high)
profile.bio = profile.bio.replace(unRegex, unReplace)
.replace(htRegex, htReplace)
for mention in ? ent{"user_mentions"}:
profile.bio.expandMention(orig, mention)
proc expandTweetEntities*(tweet: Tweet; js: JsonNode) =
let
orig = tweet.text
textRange = js{"display_text_range"}
slice = @[textRange{0}.getInt, textRange{1}.getInt]
2020-06-01 02:16:24 +02:00
hasQuote = js{"is_quote_status"}.getBool
hasCard = tweet.card.isSome
tweet.text = tweet.text.getSlice(slice)
var replyTo = ""
if tweet.replyId != 0:
with reply, js{"in_reply_to_screen_name"}:
tweet.reply.add reply.getStr
replyTo = reply.getStr
let ent = ? js{"entities"}
with urls, ent{"urls"}:
for u in urls:
tweet.text.expandUrl(u, slice[1], hasQuote)
if hasCard and u{"url"}.getStr == get(tweet.card).url:
get(tweet.card).url = u{"expanded_url"}.getStr
with media, ent{"media"}:
for m in media: tweet.text.expandUrl(m, slice[1], hideTwitter=true)
2020-06-06 10:17:19 +02:00
if "hashtags" in ent or "symbols" in ent:
tweet.text = tweet.text.replace(htRegex, htReplace)
2020-06-01 02:16:24 +02:00
for mention in ? ent{"user_mentions"}:
let
name = mention{"screen_name"}.getStr
idx = tweet.reply.find(name)
if mention{"indices"}[0].getInt >= slice[0]:
tweet.text.expandMention(orig, mention)
if idx > -1 and name != replyTo:
tweet.reply.delete idx
elif idx == -1 and tweet.replyId != 0:
tweet.reply.add name