From de9e452a7cd214303f8a5bc5b350e348215d8d45 Mon Sep 17 00:00:00 2001 From: Zed Date: Mon, 24 Jun 2019 01:34:30 +0200 Subject: [PATCH] Add parser utils to parserutils.nim --- README.md | 5 ++ src/parser.nim | 122 ++++++++++++++++---------------------------- src/parserutils.nim | 111 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 77 deletions(-) create mode 100644 src/parserutils.nim diff --git a/README.md b/README.md index 144543e..dca76e1 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Inspired by the [invidio.us](https://github.com/omarroth/invidious) project. - Dark theme ## Installation + ```bash git clone https://github.com/zedeus/nitter cd nitter @@ -23,6 +24,9 @@ like the title, this will change as the project matures a bit. For now the focus is on implementing missing features. ## Todo (roughly in this order) + +- Line connecting posts in threads +- "Show Thread" button - Twitter "Cards" (link previews) - Nitter link previews - Search (+ hashtag search) @@ -39,6 +43,7 @@ is on implementing missing features. - Nitter logo ## Why? + It's basically impossible to use Twitter without JavaScript enabled. If you try, you're redirected to the legacy mobile version which is awful both functionally and aesthetically. For privacy-minded folks, preventing JavaScript analytics and diff --git a/src/parser.nim b/src/parser.nim index af6597c..e2e761d 100644 --- a/src/parser.nim +++ b/src/parser.nim @@ -1,103 +1,71 @@ -import xmltree, sequtils, strtabs, strutils, strformat, json, times -import nimquery, regex +import xmltree, sequtils, strtabs, strutils, strformat +import nimquery -import ./types, ./formatters - -proc getAttr(node: XmlNode; attr: string; default=""): string = - if node.isNil or node.attrs.isNil: return default - return node.attrs.getOrDefault(attr) - -proc selectAttr(node: XmlNode; selector: string; attr: string; default=""): string = - let res = node.querySelector(selector) - if res == nil: "" else: res.getAttr(attr, default) - -proc selectText(node: XmlNode; selector: string): string = - let res = node.querySelector(selector) - result = if res == nil: "" else: res.innerText() +import ./types, ./parserutils proc parsePopupProfile*(node: XmlNode): Profile = let profile = node.querySelector(".profile-card") if profile.isNil: return result = Profile( - fullname: profile.selectText(".fullname").strip(), - username: profile.selectText(".username").strip(chars={'@', ' '}), - description: profile.selectText(".bio"), - verified: profile.selectText(".Icon.Icon--verified").len > 0, - protected: profile.selectText(".Icon.Icon--protected").len > 0, - userpic: profile.selectAttr(".ProfileCard-avatarImage", "src").getUserpic(), - banner: profile.selectAttr("svg > image", "xlink:href").replace("600x200", "1500x500") + fullname: profile.getName(".fullname"), + username: profile.getUsername(".username"), + description: profile.getBio(".bio"), + userpic: profile.getAvatar(".ProfileCard-avatarImage"), + verified: isVerified(profile), + protected: isProtected(profile), + banner: getBanner(profile) ) - - if result.banner.len == 0: - result.banner = profile.selectAttr(".ProfileCard-bg", "style") - - let stats = profile.querySelectorAll(".ProfileCardStats-statLink") - for s in stats: - let text = s.getAttr("title").split(" ")[0] - case s.getAttr("href").split("/")[^1] - of "followers": result.followers = text - of "following": result.following = text - else: result.tweets = text + result.getPopupStats(profile) proc parseIntentProfile*(profile: XmlNode): Profile = result = Profile( - fullname: profile.selectText("a.fn.url.alternate-context").strip(), - username: profile.selectText(".nickname").strip(chars={'@', ' '}), - userpic: profile.querySelector(".profile.summary").selectAttr("img.photo", "src").getUserPic(), - description: profile.selectText("p.note").strip(), - verified: not profile.querySelector("li.verified").isNil, - protected: not profile.querySelector("li.protected").isNil, - banner: "background-color: #161616", - tweets: "?" + fullname: profile.getName("a.fn.url.alternate-context"), + username: profile.getUsername(".nickname"), + description: profile.getBio("p.note"), + userpic: profile.querySelector(".profile.summary").getAvatar("img.photo"), + verified: not profile.querySelector("li.verified").isNil, + protected: not profile.querySelector("li.protected").isNil, + banner: getBanner(profile) ) - - for stat in profile.querySelectorAll("dd.count > a"): - case stat.getAttr("href").split("/")[^1] - of "followers": result.followers = stat.innerText() - of "following": result.following = stat.innerText() + result.getIntentStats(profile) proc parseTweetProfile*(profile: XmlNode): Profile = result = Profile( fullname: profile.getAttr("data-name"), username: profile.getAttr("data-screen-name"), - userpic: profile.selectAttr(".avatar", "src").getUserpic(), - verified: profile.selectText(".Icon.Icon--verified").len > 0 + userpic: profile.getAvatar(".avatar"), + verified: isVerified(profile) + ) + +proc parseQuote*(tweet: XmlNode): Tweet = + let tweet = tweet.querySelector(".QuoteTweet-innerContainer") + + result = Tweet( + id: tweet.getAttr("data-item-id"), + link: tweet.getAttr("href"), + text: tweet.selectText(".QuoteTweet-text") + ) + + result.profile = Profile( + fullname: tweet.getAttr("data-screen-name"), + username: tweet.selectText(".QuteTweet-fullname"), + verified: isVerified(tweet) ) proc parseTweet*(tweet: XmlNode): Tweet = - let time = tweet.querySelector(".js-short-timestamp") result = Tweet( - id: tweet.getAttr("data-item-id"), - link: tweet.getAttr("data-permalink-path"), - text: tweet.selectText(".tweet-text").stripTwitterUrls(), - pinned: "pinned" in tweet.getAttr("class"), - profile: parseTweetProfile(tweet), - time: fromUnix(parseInt(time.getAttr("data-time", "0"))), - shortTime: time.innerText(), - replies: "0", - likes: "0", - retweets: "0" + id: tweet.getAttr("data-item-id"), + link: tweet.getAttr("data-permalink-path"), + profile: parseTweetProfile(tweet), + text: getTweetText(tweet), + time: getTimestamp(tweet), + shortTime: getShortTime(tweet), + pinned: "pinned" in tweet.getAttr("class") ) - for action in tweet.querySelectorAll(".ProfileTweet-actionCountForAria"): - let text = action.innerText.split() - case text[1] - of "replies": result.replies = text[0] - of "likes": result.likes = text[0] - of "retweets": result.retweets = text[0] - else: discard - - for photo in tweet.querySelectorAll(".AdaptiveMedia-photoContainer"): - result.photos.add photo.attrs["data-image-url"] - - let player = tweet.selectAttr(".PlayableMedia-player", "style") - if player.len > 0: - let thumb = player.replace(re".+:url\('([^']+)'\)", "$1") - if "tweet_video" in thumb: - result.gif = some(thumb.replace(re".+thumb/([^\.']+)\.jpg.*", "$1")) - else: - result.videoThumb = some(thumb) + result.getTweetStats(tweet) + result.getTweetMedia(tweet) let by = tweet.selectText(".js-retweet-text > a > b") if by.len > 0: diff --git a/src/parserutils.nim b/src/parserutils.nim new file mode 100644 index 0000000..ad9e4d8 --- /dev/null +++ b/src/parserutils.nim @@ -0,0 +1,111 @@ +import xmltree, strtabs, times +import nimquery, regex + +import ./types, ./formatters + +const + thumbRegex = re".+:url\('([^']+)'\)" + gifRegex = re".+thumb/([^\.']+)\.jpg.*" + +proc getAttr*(node: XmlNode; attr: string; default=""): string = + if node.isNil or node.attrs.isNil: return default + return node.attrs.getOrDefault(attr) + +proc selectAttr*(node: XmlNode; selector: string; attr: string; default=""): string = + let res = node.querySelector(selector) + if res == nil: "" else: res.getAttr(attr, default) + +proc selectText*(node: XmlNode; selector: string): string = + let res = node.querySelector(selector) + result = if res == nil: "" else: res.innerText() + +proc isVerified*(profile: XmlNode): bool = + profile.selectText(".Icon.Icon--verified").len > 0 + +proc isProtected*(profile: XmlNode): bool = + profile.selectText(".Icon.Icon--protected").len > 0 + +proc getName*(profile: XmlNode; selector: string): string = + profile.selectText(selector).strip() + +proc getUsername*(profile: XmlNode; selector: string): string = + profile.selectText(selector).strip(chars={'@', ' '}) + +proc getTweetText*(tweet: XmlNode): string = + let selector = ".tweet-text > a.twitter-timeline-link.u-hidden" + let link = tweet.selectAttr(selector, "data-expanded-url") + var text =tweet.selectText(".tweet-text") + + if link.len > 0 and link in text: + text = text.replace(link, " " & link) + + stripTwitterUrls(text) + +proc getTime(tweet: XmlNode): XmlNode = + tweet.querySelector(".js-short-timestamp") + +proc getTimestamp*(tweet: XmlNode): Time = + let time = getTime(tweet).getAttr("data-time", "0") + fromUnix(parseInt(time)) + +proc getShortTime*(tweet: XmlNode): string = + getTime(tweet).innerText() + +proc getBio*(profile: XmlNode; selector: string): string = + profile.selectText(selector).strip() + +proc getAvatar*(profile: XmlNode; selector: string): string = + profile.selectAttr(selector, "src").getUserpic() + +proc getBanner*(tweet: XmlNode): string = + let url = tweet.selectAttr("svg > image", "xlink:href") + + if url.len > 0: + result = url.replace("600x200", "1500x500") + else: + result = tweet.selectAttr(".ProfileCard-bg", "style") + + if result.len == 0: + result = "background-color: #161616" + +proc getPopupStats*(profile: var Profile; node: XmlNode) = + for s in node.querySelectorAll( ".ProfileCardStats-statLink"): + let text = s.getAttr("title").split(" ")[0] + case s.getAttr("href").split("/")[^1] + of "followers": profile.followers = text + of "following": profile.following = text + else: profile.tweets = text + +proc getIntentStats*(profile: var Profile; node: XmlNode) = + profile.tweets = "?" + for s in node.querySelectorAll( "dd.count > a"): + let text = s.innerText() + case s.getAttr("href").split("/")[^1] + of "followers": profile.followers = text + of "following": profile.following = text + +proc getTweetStats*(tweet: var Tweet; node: XmlNode) = + tweet.replies = "0" + tweet.retweets = "0" + tweet.likes = "0" + + for action in node.querySelectorAll(".ProfileTweet-actionCountForAria"): + let text = action.innerText.split() + case text[1] + of "replies": tweet.replies = text[0] + of "likes": tweet.likes = text[0] + of "retweets": tweet.retweets = text[0] + +proc getTweetMedia*(tweet: var Tweet; node: XmlNode) = + for photo in node.querySelectorAll(".AdaptiveMedia-photoContainer"): + tweet.photos.add photo.attrs["data-image-url"] + + let player = node.selectAttr(".PlayableMedia-player", "style") + if player.len == 0: + return + + let thumb = player.replace(thumbRegex, "$1") + if "tweet_video" in thumb: + tweet.gif = some(thumb.replace(gifRegex, "$1")) + else: + tweet.videoThumb = some(thumb)