From cdf49dcdddf25b746550dd7dc3602cbfff6178dc Mon Sep 17 00:00:00 2001 From: Zed Date: Sun, 16 Jan 2022 06:00:11 +0100 Subject: [PATCH] Add experimental user parser --- config.nims | 1 + src/api.nim | 9 ++-- src/apiutils.nim | 74 ++++++++++++++++++++---------- src/experimental/parser/slices.nim | 67 +++++++++++++++++++++++++++ src/experimental/parser/user.nim | 68 +++++++++++++++++++++++++++ src/experimental/parser/utils.nim | 22 +++++++++ src/experimental/types/common.nim | 30 ++++++++++++ src/experimental/types/user.nim | 28 +++++++++++ 8 files changed, 270 insertions(+), 29 deletions(-) create mode 100644 src/experimental/parser/slices.nim create mode 100644 src/experimental/parser/user.nim create mode 100644 src/experimental/parser/utils.nim create mode 100644 src/experimental/types/common.nim create mode 100644 src/experimental/types/user.nim diff --git a/config.nims b/config.nims index b74d70e..ee77289 100644 --- a/config.nims +++ b/config.nims @@ -7,6 +7,7 @@ # disable annoying warnings warning("GcUnsafe2", off) hint("XDeclaredButNotUsed", off) +hint("XCannotRaiseY", off) hint("User", off) const diff --git a/src/api.nim b/src/api.nim index 9a6e70a..50771b7 100644 --- a/src/api.nim +++ b/src/api.nim @@ -2,6 +2,7 @@ import asyncdispatch, httpclient, uri, strutils import packedjson import types, query, formatters, consts, apiutils, parser +import experimental/parser/user proc getGraphListBySlug*(name, list: string): Future[List] {.async.} = let @@ -32,14 +33,14 @@ proc getListMembers*(list: List; after=""): Future[Result[Profile]] {.async.} = proc getProfile*(username: string): Future[Profile] {.async.} = let ps = genParams({"screen_name": username}) - js = await fetch(userShow ? ps, Api.userShow) - result = parseUserShow(js, username=username) + json = await fetchRaw(userShow ? ps, Api.userShow) + result = parseUser(json) proc getProfileById*(userId: string): Future[Profile] {.async.} = let ps = genParams({"user_id": userId}) - js = await fetch(userShow ? ps, Api.userShow) - result = parseUserShow(js, id=userId) + json = await fetchRaw(userShow ? ps, Api.userShow) + result = parseUser(json) proc getTimeline*(id: string; after=""; replies=false): Future[Timeline] {.async.} = let diff --git a/src/apiutils.nim b/src/apiutils.nim index e287dc5..85d789b 100644 --- a/src/apiutils.nim +++ b/src/apiutils.nim @@ -1,7 +1,8 @@ # SPDX-License-Identifier: AGPL-3.0-only -import httpclient, asyncdispatch, options, times, strutils, uri -import packedjson, zippy +import httpclient, asyncdispatch, options, sequtils, strutils, uri +import jsony, packedjson, zippy import types, tokens, consts, parserutils, http_pool +from experimental/types/common import Errors, ErrorObj const rlRemaining = "x-rate-limit-remaining" @@ -40,7 +41,14 @@ proc genHeaders*(token: Token = nil): HttpHeaders = "DNT": "1" }) -proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} = +template updateToken() = + if api != Api.search and resp.headers.hasKey(rlRemaining): + let + remaining = parseInt(resp.headers[rlRemaining]) + reset = parseInt(resp.headers[rlReset]) + token.setRateLimit(api, remaining, reset) + +template fetchImpl(result, fetchBody) {.dirty.} = once: pool = HttpPool() @@ -48,37 +56,21 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} = if token.tok.len == 0: raise rateLimitError() - let headers = genHeaders(token) try: var resp: AsyncResponse - var body = pool.use(headers): + result = pool.use(genHeaders(token)): resp = await c.get($url) await resp.body - if body.len > 0: + if result.len > 0: if resp.headers.getOrDefault("content-encoding") == "gzip": - body = uncompress(body, dfGzip) + result = uncompress(result, dfGzip) else: - echo "non-gzip body, url: ", url, ", body: ", body + echo "non-gzip body, url: ", url, ", body: ", result - if body.startsWith('{') or body.startsWith('['): - result = parseJson(body) - else: - echo resp.status, ": ", body - result = newJNull() + fetchBody - if api != Api.search and resp.headers.hasKey(rlRemaining): - let - remaining = parseInt(resp.headers[rlRemaining]) - reset = parseInt(resp.headers[rlReset]) - token.setRateLimit(api, remaining, reset) - - if result.getError notin {invalidToken, forbidden, badToken}: - release(token, used=true) - else: - echo "fetch error: ", result.getError - release(token, invalid=true) - raise rateLimitError() + release(token, used=true) if resp.status == $Http400: raise newException(InternalError, $url) @@ -89,3 +81,35 @@ proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} = if "length" notin e.msg and "descriptor" notin e.msg: release(token, invalid=true) raise rateLimitError() + +proc fetch*(url: Uri; api: Api): Future[JsonNode] {.async.} = + var body: string + fetchImpl body: + if body.startsWith('{') or body.startsWith('['): + result = parseJson(body) + else: + echo resp.status, ": ", body + result = newJNull() + + updateToken() + + let error = result.getError + if error in {invalidToken, forbidden, badToken}: + echo "fetch error: ", result.getError + release(token, invalid=true) + raise rateLimitError() + +proc fetchRaw*(url: Uri; api: Api): Future[string] {.async.} = + fetchImpl result: + if not (result.startsWith('{') or result.startsWith('[')): + echo resp.status, ": ", result + result.setLen(0) + + updateToken() + + if result.startsWith("{\"errors"): + let errors = result.fromJson(Errors).errors + if errors.anyIt(it.code in {invalidToken, forbidden, badToken}): + echo "fetch error: ", errors + release(token, invalid=true) + raise rateLimitError() diff --git a/src/experimental/parser/slices.nim b/src/experimental/parser/slices.nim new file mode 100644 index 0000000..45e6e1d --- /dev/null +++ b/src/experimental/parser/slices.nim @@ -0,0 +1,67 @@ +import std/[macros, htmlgen, unicode] +import ../types/common +import ".."/../[formatters, utils] + +type + ReplaceSliceKind = enum + rkRemove, rkUrl, rkHashtag, rkMention + + ReplaceSlice* = object + slice: Slice[int] + kind: ReplaceSliceKind + url, display: string + +proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b) + +proc dedupSlices*(s: var seq[ReplaceSlice]) = + var + len = s.len + i = 0 + while i < len: + var j = i + 1 + while j < len: + if s[i].slice.a == s[j].slice.a: + s.del j + dec len + else: + inc j + inc i + +proc extractUrls*(result: var seq[ReplaceSlice]; url: Url; + textLen: int; hideTwitter = false) = + let + link = url.expandedUrl + slice = url.indices[0] ..< url.indices[1] + + if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl: + if slice.a < textLen: + result.add ReplaceSlice(kind: rkRemove, slice: slice) + else: + result.add ReplaceSlice(kind: rkUrl, url: link, + display: link.shortLink, slice: slice) + +proc replacedWith*(runes: seq[Rune]; repls: openArray[ReplaceSlice]; + textSlice: Slice[int]): string = + template extractLowerBound(i: int; idx): int = + if i > 0: repls[idx].slice.b.succ else: textSlice.a + + result = newStringOfCap(runes.len) + + for i, rep in repls: + result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a] + case rep.kind + of rkHashtag: + let + name = $runes[rep.slice.a.succ .. rep.slice.b] + symbol = $runes[rep.slice.a] + result.add a(symbol & name, href = "/search?q=%23" & name) + of rkMention: + result.add a($runes[rep.slice], href = rep.url, title = rep.display) + of rkUrl: + result.add a(rep.display, href = rep.url) + of rkRemove: + discard + + let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b + if rest.a <= rest.b: + result.add $runes[rest] diff --git a/src/experimental/parser/user.nim b/src/experimental/parser/user.nim new file mode 100644 index 0000000..8a77aca --- /dev/null +++ b/src/experimental/parser/user.nim @@ -0,0 +1,68 @@ +import std/[algorithm, unicode, re, strutils] +import jsony +import utils, slices +import ../types/user as userType +from ../../types import Profile, Error + +let + unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})" + unReplace = "$1@$2" + + htRegex = re"(^|[^\w-_./?])([##$])([\w_]+)" + htReplace = "$1$2$3" + +proc expandProfileEntities(profile: var Profile; user: User) = + let + orig = profile.bio.toRunes + ent = user.entities + + if ent.url.urls.len > 0: + profile.website = ent.url.urls[0].expandedUrl + + var replacements = newSeq[ReplaceSlice]() + + for u in ent.description.urls: + replacements.extractUrls(u, orig.high) + + replacements.dedupSlices + replacements.sort(cmp) + + profile.bio = orig.replacedWith(replacements, 0 .. orig.len) + .replacef(unRegex, unReplace) + .replacef(htRegex, htReplace) + +proc getBanner(user: User): string = + if user.profileBannerUrl.len > 0: + return user.profileBannerUrl & "/1500x500" + if user.profileLinkColor.len > 0: + return '#' & user.profileLinkColor + return "#161616" + +proc parseUser*(json: string): Profile = + handleErrors: + case error + of suspended: return Profile(suspended: true) + of userNotFound: return + else: echo "[error - parseUser]: ", error + + let user = json.fromJson(User) + + result = Profile( + id: user.idStr, + username: user.screenName, + fullname: user.name, + location: user.location, + bio: user.description, + following: user.friendsCount, + followers: user.followersCount, + tweets: user.statusesCount, + likes: user.favouritesCount, + media: user.mediaCount, + verified: user.verified, + protected: user.protected, + joinDate: parseTwitterDate(user.createdAt), + banner: getBanner(user), + userPic: getImageUrl(user.profileImageUrlHttps).replace("_normal", "") + ) + + result.expandProfileEntities(user) diff --git a/src/experimental/parser/utils.nim b/src/experimental/parser/utils.nim new file mode 100644 index 0000000..1614093 --- /dev/null +++ b/src/experimental/parser/utils.nim @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: AGPL-3.0-only +import std/[sugar, strutils, times] +import ../types/common +import ../../utils as uutils + +template parseTime(time: string; f: static string; flen: int): DateTime = + if time.len != flen: return + parse(time, f, utc()) + +proc parseIsoDate*(date: string): DateTime = + date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20) + +proc parseTwitterDate*(date: string): DateTime = + date.parseTime("ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30) + +proc getImageUrl*(url: string): string = + url.dup(removePrefix(twimg), removePrefix(https)) + +template handleErrors*(body) = + if json.startsWith("{\"errors"): + let error {.inject.} = json.fromJson(Errors).errors[0].code + body diff --git a/src/experimental/types/common.nim b/src/experimental/types/common.nim new file mode 100644 index 0000000..1d3b30b --- /dev/null +++ b/src/experimental/types/common.nim @@ -0,0 +1,30 @@ +from ../../types import Error + +type + Url* = object + url*: string + expandedUrl*: string + displayUrl*: string + indices*: array[2, int] + + ErrorCode* = enum + null = 0 + noUserMatches = 17 + protectedUser = 22 + couldntAuth = 32 + doesntExist = 34 + userNotFound = 50 + suspended = 63 + rateLimited = 88 + invalidToken = 89 + listIdOrSlug = 112 + forbidden = 200 + badToken = 239 + noCsrf = 353 + + ErrorObj* = object + code*: Error + message*: string + + Errors* = object + errors*: seq[ErrorObj] diff --git a/src/experimental/types/user.nim b/src/experimental/types/user.nim new file mode 100644 index 0000000..1f31318 --- /dev/null +++ b/src/experimental/types/user.nim @@ -0,0 +1,28 @@ +import common + +type + User* = object + idStr*: string + name*: string + screenName*: string + location*: string + description*: string + entities*: Entities + createdAt*: string + followersCount*: int + friendsCount*: int + favouritesCount*: int + statusesCount*: int + mediaCount*: int + verified*: bool + protected*: bool + profileBannerUrl*: string + profileImageUrlHttps*: string + profileLinkColor*: string + + Entities* = object + url*: Urls + description*: Urls + + Urls* = object + urls*: seq[Url]