From bafe5fb550eb59618e9c816c0ad392c513caac0d Mon Sep 17 00:00:00 2001 From: Zed Date: Wed, 2 Oct 2019 10:13:17 +0200 Subject: [PATCH] Refactor header code --- src/api/list.nim | 40 +++++----------------- src/api/media.nim | 79 +++++++++++++------------------------------- src/api/profile.nim | 25 +++++--------- src/api/search.nim | 10 ++---- src/api/timeline.nim | 17 ++-------- src/api/tweet.nim | 16 +++------ src/api/utils.nim | 30 ++++++++++++++--- src/cache.nim | 2 +- tests/test_card.py | 11 ++---- 9 files changed, 79 insertions(+), 151 deletions(-) diff --git a/src/api/list.nim b/src/api/list.nim index f140d48..e28ea8f 100644 --- a/src/api/list.nim +++ b/src/api/list.nim @@ -7,15 +7,6 @@ import utils, consts, timeline, search proc getListTimeline*(username, list, agent, after: string): Future[Timeline] {.async.} = let url = base / (listUrl % [username, list]) - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $url, - "User-Agent": agent, - "X-Twitter-Active-User": "yes", - "X-Requested-With": "XMLHttpRequest", - "Accept-Language": lang - }) - var params = toSeq({ "include_available_features": "1", "include_entities": "1", @@ -25,7 +16,7 @@ proc getListTimeline*(username, list, agent, after: string): Future[Timeline] {. if after.len > 0: params.add {"max_position": after} - let json = await fetchJson(url ? params, headers) + let json = await fetchJson(url ? params, genHeaders(agent, url)) result = await finishTimeline(json, Query(), after, agent) if result.content.len == 0: return @@ -36,16 +27,10 @@ proc getListTimeline*(username, list, agent, after: string): Future[Timeline] {. else: get(last.retweet).id proc getListMembers*(username, list, agent: string): Future[Result[Profile]] {.async.} = - let url = base / (listMembersUrl % [username, list]) - - let headers = newHttpHeaders({ - "Accept": htmlAccept, - "Referer": $(base / &"{username}/lists/{list}/members"), - "User-Agent": agent, - "Accept-Language": lang - }) - - let html = await fetchHtml(url, headers) + let + url = base / (listMembersUrl % [username, list]) + referer = base / &"{username}/lists/{list}/members" + html = await fetchHtml(url, genHeaders(agent, referer)) result = Result[Profile]( minId: html.selectAttr(".stream-container", "data-min-position"), @@ -56,17 +41,10 @@ proc getListMembers*(username, list, agent: string): Future[Result[Profile]] {.a ) proc getListMembersSearch*(username, list, agent, after: string): Future[Result[Profile]] {.async.} = - let url = base / ((listMembersUrl & "/timeline") % [username, list]) - - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $(base / &"{username}/lists/{list}/members"), - "User-Agent": agent, - "X-Twitter-Active-User": "yes", - "X-Requested-With": "XMLHttpRequest", - "X-Push-With": "XMLHttpRequest", - "Accept-Language": lang - }) + let + url = base / ((listMembersUrl & "/timeline") % [username, list]) + referer = base / &"{username}/lists/{list}/members" + headers = genHeaders({"x-push-with": "XMLHttpRequest"}, agent, referer, xml=true) var params = toSeq({ "include_available_features": "1", diff --git a/src/api/media.nim b/src/api/media.nim index d66289c..c230dde 100644 --- a/src/api/media.nim +++ b/src/api/media.nim @@ -51,35 +51,23 @@ proc getGuestToken(agent: string; force=false): Future[string] {.async.} = tokenUpdated = getTime() tokenUses = 0 - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $base, - "User-Agent": agent, - "Authorization": auth - }) - - newClient() - let + headers = genHeaders({"authorization": auth}, agent, base, lang=false) url = apiBase / tokenUrl - json = parseJson(await client.postContent($url)) + json = await fetchJson(url, headers) - result = json["guest_token"].to(string) - guestToken = result + if json != nil: + result = json["guest_token"].to(string) + guestToken = result proc getVideoFetch(tweet: Tweet; agent, token: string) {.async.} = if tweet.video.isNone(): return - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $(base / getLink(tweet)), - "User-Agent": agent, - "Authorization": auth, - "x-guest-token": token - }) - - let url = apiBase / (videoUrl % tweet.id) - let json = await fetchJson(url, headers) + let + headers = genHeaders({"authorization": auth, "x-guest-token": token}, + agent, base / getLink(tweet), lang=false) + url = apiBase / (videoUrl % tweet.id) + json = await fetchJson(url, headers) if json == nil: if getTime() - tokenUpdated > initDuration(seconds=1): @@ -114,52 +102,31 @@ proc getVideo*(tweet: Tweet; agent, token: string; force=false) {.async.} = proc getPoll*(tweet: Tweet; agent: string) {.async.} = if tweet.poll.isNone(): return - let headers = newHttpHeaders({ - "Accept": htmlAccept, - "Referer": $(base / getLink(tweet)), - "User-Agent": agent, - "Authority": "twitter.com", - "Accept-Language": lang, - }) + let + headers = genHeaders(agent, base / getLink(tweet), auth=true) + url = base / (pollUrl % tweet.id) + html = await fetchHtml(url, headers) - let url = base / (pollUrl % tweet.id) - let html = await fetchHtml(url, headers) if html == nil: return - tweet.poll = some parsePoll(html) proc getCard*(tweet: Tweet; agent: string) {.async.} = if tweet.card.isNone(): return - let headers = newHttpHeaders({ - "Accept": htmlAccept, - "Referer": $(base / getLink(tweet)), - "User-Agent": agent, - "Authority": "twitter.com", - "Accept-Language": lang, - }) + let + headers = genHeaders(agent, base / getLink(tweet), auth=true) + query = get(tweet.card).query.replace("sensitive=true", "sensitive=false") + html = await fetchHtml(base / query, headers) - let query = get(tweet.card).query.replace("sensitive=true", "sensitive=false") - let html = await fetchHtml(base / query, headers) if html == nil: return - parseCard(get(tweet.card), html) proc getPhotoRail*(username, agent: string): Future[seq[GalleryPhoto]] {.async.} = - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $(base / username), - "User-Agent": agent, - "X-Requested-With": "XMLHttpRequest" - }) - - let params = { - "for_photo_rail": "true", - "oldest_unread_id": "0" - } - - let url = base / (timelineMediaUrl % username) ? params - let html = await fetchHtml(url, headers, jsonKey="items_html") + let + headers = genHeaders({"x-requested-with": "XMLHttpRequest"}, agent, base / username) + params = {"for_photo_rail": "true", "oldest_unread_id": "0"} + url = base / (timelineMediaUrl % username) ? params + html = await fetchHtml(url, headers, jsonKey="items_html") result = parsePhotoRail(html) diff --git a/src/api/profile.nim b/src/api/profile.nim index 363f727..f66337f 100644 --- a/src/api/profile.nim +++ b/src/api/profile.nim @@ -12,21 +12,15 @@ proc getProfileFallback(username: string; headers: HttpHeaders): Future[Profile] result = parseIntentProfile(html) proc getProfile*(username, agent: string): Future[Profile] {.async.} = - let headers = newHttpHeaders({ - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9", - "Referer": $(base / username), - "User-Agent": agent, - "X-Twitter-Active-User": "yes", - "X-Requested-With": "XMLHttpRequest", - "Accept-Language": lang - }) - let + headers = genHeaders(agent, base / username, xml=true) + params = { "screen_name": username, "wants_hovercard": "true", "_": $(epochTime().int) } + url = base / profilePopupUrl ? params html = await fetchHtml(url, headers, jsonKey="html") @@ -37,14 +31,11 @@ proc getProfile*(username, agent: string): Future[Profile] {.async.} = result = parsePopupProfile(html) -proc getProfileFull*(username: string): Future[Profile] {.async.} = - let headers = newHttpHeaders({ - "authority": "twitter.com", - "accept": htmlAccept, - "referer": "https://twitter.com/" & username, - "accept-language": lang - }) +proc getProfileFull*(username, agent: string): Future[Profile] {.async.} = + let + url = base / username + headers = genHeaders(agent, url, auth=true) + html = await fetchHtml(url, headers) - let html = await fetchHtml(base / username, headers) if html == nil: return result = parseTimelineProfile(html) diff --git a/src/api/search.nim b/src/api/search.nim index 5d15383..379244b 100644 --- a/src/api/search.nim +++ b/src/api/search.nim @@ -22,14 +22,8 @@ proc getSearch*[T](query: Query; after, agent: string): Future[Result[T]] {.asyn param = genQueryParam(query) encoded = encodeUrl(param, usePlus=false) - headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $(base / ("search?f=$1&q=$2&src=typd" % [kind, encoded])), - "User-Agent": agent, - "X-Requested-With": "XMLHttpRequest", - "Authority": "twitter.com", - "Accept-Language": lang - }) + referer = base / ("search?f=$1&q=$2&src=typd" % [kind, encoded]) + headers = genHeaders(agent, referer, auth=true, xml=true) params = { "f": kind, diff --git a/src/api/timeline.nim b/src/api/timeline.nim index b2a98d1..3f2f68d 100644 --- a/src/api/timeline.nim +++ b/src/api/timeline.nim @@ -22,14 +22,7 @@ proc finishTimeline*(json: JsonNode; query: Query; after, agent: string): Future result.content = thread.content proc getTimeline*(username, after, agent: string): Future[Timeline] {.async.} = - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $(base / username), - "User-Agent": agent, - "X-Twitter-Active-User": "yes", - "X-Requested-With": "XMLHttpRequest", - "Accept-Language": lang - }) + let headers = genHeaders(agent, base / username, xml=true) var params = toSeq({ "include_available_features": "1", @@ -45,18 +38,12 @@ proc getTimeline*(username, after, agent: string): Future[Timeline] {.async.} = result = await finishTimeline(json, Query(), after, agent) proc getProfileAndTimeline*(username, agent, after: string): Future[(Profile, Timeline)] {.async.} = - let headers = newHttpHeaders({ - "authority": "twitter.com", - "accept": htmlAccept, - "referer": "https://twitter.com/" & username, - "accept-language": lang - }) - var url = base / username if after.len > 0: url = url ? {"max_position": after} let + headers = genHeaders(agent, base / username, auth=true) html = await fetchHtml(url, headers) timeline = parseTimeline(html.select("#timeline > .stream-container"), after) profile = parseTimelineProfile(html) diff --git a/src/api/tweet.nim b/src/api/tweet.nim index 8dcf33d..2939100 100644 --- a/src/api/tweet.nim +++ b/src/api/tweet.nim @@ -4,18 +4,12 @@ import ".."/[types, parser] import utils, consts, media proc getTweet*(username, id, after, agent: string): Future[Conversation] {.async.} = - let headers = newHttpHeaders({ - "Accept": jsonAccept, - "Referer": $base, - "User-Agent": agent, - "X-Twitter-Active-User": "yes", - "X-Requested-With": "XMLHttpRequest", - "Accept-Language": lang, - "Pragma": "no-cache", - "X-Previous-Page-Name": "profile" - }) - let + headers = genHeaders({ + "pragma": "no-cache", + "x-previous-page-name": "profile" + }, agent, base, xml=true) + url = base / username / tweetUrl / id ? {"max_position": after} html = await fetchHtml(url, headers) diff --git a/src/api/utils.nim b/src/api/utils.nim index b76c18f..e6df7e3 100644 --- a/src/api/utils.nim +++ b/src/api/utils.nim @@ -1,12 +1,35 @@ import httpclient, asyncdispatch, htmlparser import strutils, json, xmltree, uri +import consts + +proc genHeaders*(headers: openArray[tuple[key: string, val: string]]; + agent: string; referer: Uri; lang=true; + auth=false; xml=false): HttpHeaders = + result = newHttpHeaders({ + "referer": $referer, + "user-agent": agent, + "x-twitter-active-user": "yes", + }) + + if auth: result["authority"] = "twitter.com" + if lang: result["accept-language"] = consts.lang + if xml: result["x-requested-with"] = "XMLHttpRequest" + + for (key, val) in headers: + result[key] = val + +proc genHeaders*(agent: string; referer: Uri; lang=true; + auth=false; xml=false): HttpHeaders = + genHeaders([], agent, referer, lang, auth, xml) + template newClient*() {.dirty.} = var client = newAsyncHttpClient() defer: client.close() client.headers = headers proc fetchHtml*(url: Uri; headers: HttpHeaders; jsonKey = ""): Future[XmlNode] {.async.} = + headers["accept"] = htmlAccept newClient() var resp = "" @@ -16,12 +39,11 @@ proc fetchHtml*(url: Uri; headers: HttpHeaders; jsonKey = ""): Future[XmlNode] { return nil if jsonKey.len > 0: - let json = parseJson(resp)[jsonKey].str - return parseHtml(json) - else: - return parseHtml(resp) + resp = parseJson(resp)[jsonKey].str + return parseHtml(resp) proc fetchJson*(url: Uri; headers: HttpHeaders): Future[JsonNode] {.async.} = + headers["accept"] = jsonAccept newClient() var resp = "" diff --git a/src/cache.nim b/src/cache.nim index f02b568..3aa9aca 100644 --- a/src/cache.nim +++ b/src/cache.nim @@ -39,7 +39,7 @@ proc getCachedProfile*(username, agent: string; force=false): Future[Profile] {. result.getOne("lower(username) = ?", toLower(username)) doAssert not result.isOutdated except AssertionError, KeyError: - result = await getProfileFull(username) + result = await getProfileFull(username, agent) cache(result) proc setProfileCacheTime*(minutes: int) = diff --git a/tests/test_card.py b/tests/test_card.py index c716b3a..3d694ac 100644 --- a/tests/test_card.py +++ b/tests/test_card.py @@ -9,8 +9,8 @@ card = [ 'github.com', False], ['lorenlugosch/status/1115440394148487168', - 'lorenlugosch/pretrain_speech_model', - 'Speech Model Pre-training for End-to-End Spoken Language Understanding - lorenlugosch/pretrain_speech_model', + 'lorenlugosch/end-to-end-SLU', + 'PyTorch code for end-to-end spoken language understanding (SLU) with ASR-based transfer learning - lorenlugosch/end-to-end-SLU', 'github.com', False], ['PyTorch/status/1123379369672450051', @@ -85,12 +85,7 @@ playable = [ ['nim_lang/status/1121090879823986688', 'Nim - First natively compiled language w/ hot code-reloading at...', '#nim #c++ #ACCUConf Nim is a statically typed systems and applications programming language which offers perhaps some of the most powerful metaprogramming ca...', - 'youtube.com'], - - ['lele/status/819930645145288704', - 'Eurocrash presents Open Decks - emerging dj #4: E-Musik', - "OPEN DECKS is Eurocrash's new project about discovering new and emerging dj talents. Every selected dj will have the chance to perform the first dj-set in front of an actual audience. The best dj...", - 'mixcloud.com'] + 'youtube.com'] ] promo = [