Add parser utils to parserutils.nim

This commit is contained in:
Zed 2019-06-24 01:34:30 +02:00
parent c559ab1f1a
commit de9e452a7c
3 changed files with 161 additions and 77 deletions

View File

@ -12,6 +12,7 @@ Inspired by the [invidio.us](https://github.com/omarroth/invidious) project.
- Dark theme - Dark theme
## Installation ## Installation
```bash ```bash
git clone https://github.com/zedeus/nitter git clone https://github.com/zedeus/nitter
cd nitter cd nitter
@ -23,6 +24,9 @@ like the title, this will change as the project matures a bit. For now the focus
is on implementing missing features. is on implementing missing features.
## Todo (roughly in this order) ## Todo (roughly in this order)
- Line connecting posts in threads
- "Show Thread" button
- Twitter "Cards" (link previews) - Twitter "Cards" (link previews)
- Nitter link previews - Nitter link previews
- Search (+ hashtag search) - Search (+ hashtag search)
@ -39,6 +43,7 @@ is on implementing missing features.
- Nitter logo - Nitter logo
## Why? ## Why?
It's basically impossible to use Twitter without JavaScript enabled. If you try, It's basically impossible to use Twitter without JavaScript enabled. If you try,
you're redirected to the legacy mobile version which is awful both functionally you're redirected to the legacy mobile version which is awful both functionally
and aesthetically. For privacy-minded folks, preventing JavaScript analytics and and aesthetically. For privacy-minded folks, preventing JavaScript analytics and

View File

@ -1,103 +1,71 @@
import xmltree, sequtils, strtabs, strutils, strformat, json, times import xmltree, sequtils, strtabs, strutils, strformat
import nimquery, regex import nimquery
import ./types, ./formatters import ./types, ./parserutils
proc getAttr(node: XmlNode; attr: string; default=""): string =
if node.isNil or node.attrs.isNil: return default
return node.attrs.getOrDefault(attr)
proc selectAttr(node: XmlNode; selector: string; attr: string; default=""): string =
let res = node.querySelector(selector)
if res == nil: "" else: res.getAttr(attr, default)
proc selectText(node: XmlNode; selector: string): string =
let res = node.querySelector(selector)
result = if res == nil: "" else: res.innerText()
proc parsePopupProfile*(node: XmlNode): Profile = proc parsePopupProfile*(node: XmlNode): Profile =
let profile = node.querySelector(".profile-card") let profile = node.querySelector(".profile-card")
if profile.isNil: return if profile.isNil: return
result = Profile( result = Profile(
fullname: profile.selectText(".fullname").strip(), fullname: profile.getName(".fullname"),
username: profile.selectText(".username").strip(chars={'@', ' '}), username: profile.getUsername(".username"),
description: profile.selectText(".bio"), description: profile.getBio(".bio"),
verified: profile.selectText(".Icon.Icon--verified").len > 0, userpic: profile.getAvatar(".ProfileCard-avatarImage"),
protected: profile.selectText(".Icon.Icon--protected").len > 0, verified: isVerified(profile),
userpic: profile.selectAttr(".ProfileCard-avatarImage", "src").getUserpic(), protected: isProtected(profile),
banner: profile.selectAttr("svg > image", "xlink:href").replace("600x200", "1500x500") banner: getBanner(profile)
) )
result.getPopupStats(profile)
if result.banner.len == 0:
result.banner = profile.selectAttr(".ProfileCard-bg", "style")
let stats = profile.querySelectorAll(".ProfileCardStats-statLink")
for s in stats:
let text = s.getAttr("title").split(" ")[0]
case s.getAttr("href").split("/")[^1]
of "followers": result.followers = text
of "following": result.following = text
else: result.tweets = text
proc parseIntentProfile*(profile: XmlNode): Profile = proc parseIntentProfile*(profile: XmlNode): Profile =
result = Profile( result = Profile(
fullname: profile.selectText("a.fn.url.alternate-context").strip(), fullname: profile.getName("a.fn.url.alternate-context"),
username: profile.selectText(".nickname").strip(chars={'@', ' '}), username: profile.getUsername(".nickname"),
userpic: profile.querySelector(".profile.summary").selectAttr("img.photo", "src").getUserPic(), description: profile.getBio("p.note"),
description: profile.selectText("p.note").strip(), userpic: profile.querySelector(".profile.summary").getAvatar("img.photo"),
verified: not profile.querySelector("li.verified").isNil, verified: not profile.querySelector("li.verified").isNil,
protected: not profile.querySelector("li.protected").isNil, protected: not profile.querySelector("li.protected").isNil,
banner: "background-color: #161616", banner: getBanner(profile)
tweets: "?"
) )
result.getIntentStats(profile)
for stat in profile.querySelectorAll("dd.count > a"):
case stat.getAttr("href").split("/")[^1]
of "followers": result.followers = stat.innerText()
of "following": result.following = stat.innerText()
proc parseTweetProfile*(profile: XmlNode): Profile = proc parseTweetProfile*(profile: XmlNode): Profile =
result = Profile( result = Profile(
fullname: profile.getAttr("data-name"), fullname: profile.getAttr("data-name"),
username: profile.getAttr("data-screen-name"), username: profile.getAttr("data-screen-name"),
userpic: profile.selectAttr(".avatar", "src").getUserpic(), userpic: profile.getAvatar(".avatar"),
verified: profile.selectText(".Icon.Icon--verified").len > 0 verified: isVerified(profile)
)
proc parseQuote*(tweet: XmlNode): Tweet =
let tweet = tweet.querySelector(".QuoteTweet-innerContainer")
result = Tweet(
id: tweet.getAttr("data-item-id"),
link: tweet.getAttr("href"),
text: tweet.selectText(".QuoteTweet-text")
)
result.profile = Profile(
fullname: tweet.getAttr("data-screen-name"),
username: tweet.selectText(".QuteTweet-fullname"),
verified: isVerified(tweet)
) )
proc parseTweet*(tweet: XmlNode): Tweet = proc parseTweet*(tweet: XmlNode): Tweet =
let time = tweet.querySelector(".js-short-timestamp")
result = Tweet( result = Tweet(
id: tweet.getAttr("data-item-id"), id: tweet.getAttr("data-item-id"),
link: tweet.getAttr("data-permalink-path"), link: tweet.getAttr("data-permalink-path"),
text: tweet.selectText(".tweet-text").stripTwitterUrls(), profile: parseTweetProfile(tweet),
pinned: "pinned" in tweet.getAttr("class"), text: getTweetText(tweet),
profile: parseTweetProfile(tweet), time: getTimestamp(tweet),
time: fromUnix(parseInt(time.getAttr("data-time", "0"))), shortTime: getShortTime(tweet),
shortTime: time.innerText(), pinned: "pinned" in tweet.getAttr("class")
replies: "0",
likes: "0",
retweets: "0"
) )
for action in tweet.querySelectorAll(".ProfileTweet-actionCountForAria"): result.getTweetStats(tweet)
let text = action.innerText.split() result.getTweetMedia(tweet)
case text[1]
of "replies": result.replies = text[0]
of "likes": result.likes = text[0]
of "retweets": result.retweets = text[0]
else: discard
for photo in tweet.querySelectorAll(".AdaptiveMedia-photoContainer"):
result.photos.add photo.attrs["data-image-url"]
let player = tweet.selectAttr(".PlayableMedia-player", "style")
if player.len > 0:
let thumb = player.replace(re".+:url\('([^']+)'\)", "$1")
if "tweet_video" in thumb:
result.gif = some(thumb.replace(re".+thumb/([^\.']+)\.jpg.*", "$1"))
else:
result.videoThumb = some(thumb)
let by = tweet.selectText(".js-retweet-text > a > b") let by = tweet.selectText(".js-retweet-text > a > b")
if by.len > 0: if by.len > 0:

111
src/parserutils.nim Normal file
View File

@ -0,0 +1,111 @@
import xmltree, strtabs, times
import nimquery, regex
import ./types, ./formatters
const
thumbRegex = re".+:url\('([^']+)'\)"
gifRegex = re".+thumb/([^\.']+)\.jpg.*"
proc getAttr*(node: XmlNode; attr: string; default=""): string =
if node.isNil or node.attrs.isNil: return default
return node.attrs.getOrDefault(attr)
proc selectAttr*(node: XmlNode; selector: string; attr: string; default=""): string =
let res = node.querySelector(selector)
if res == nil: "" else: res.getAttr(attr, default)
proc selectText*(node: XmlNode; selector: string): string =
let res = node.querySelector(selector)
result = if res == nil: "" else: res.innerText()
proc isVerified*(profile: XmlNode): bool =
profile.selectText(".Icon.Icon--verified").len > 0
proc isProtected*(profile: XmlNode): bool =
profile.selectText(".Icon.Icon--protected").len > 0
proc getName*(profile: XmlNode; selector: string): string =
profile.selectText(selector).strip()
proc getUsername*(profile: XmlNode; selector: string): string =
profile.selectText(selector).strip(chars={'@', ' '})
proc getTweetText*(tweet: XmlNode): string =
let selector = ".tweet-text > a.twitter-timeline-link.u-hidden"
let link = tweet.selectAttr(selector, "data-expanded-url")
var text =tweet.selectText(".tweet-text")
if link.len > 0 and link in text:
text = text.replace(link, " " & link)
stripTwitterUrls(text)
proc getTime(tweet: XmlNode): XmlNode =
tweet.querySelector(".js-short-timestamp")
proc getTimestamp*(tweet: XmlNode): Time =
let time = getTime(tweet).getAttr("data-time", "0")
fromUnix(parseInt(time))
proc getShortTime*(tweet: XmlNode): string =
getTime(tweet).innerText()
proc getBio*(profile: XmlNode; selector: string): string =
profile.selectText(selector).strip()
proc getAvatar*(profile: XmlNode; selector: string): string =
profile.selectAttr(selector, "src").getUserpic()
proc getBanner*(tweet: XmlNode): string =
let url = tweet.selectAttr("svg > image", "xlink:href")
if url.len > 0:
result = url.replace("600x200", "1500x500")
else:
result = tweet.selectAttr(".ProfileCard-bg", "style")
if result.len == 0:
result = "background-color: #161616"
proc getPopupStats*(profile: var Profile; node: XmlNode) =
for s in node.querySelectorAll( ".ProfileCardStats-statLink"):
let text = s.getAttr("title").split(" ")[0]
case s.getAttr("href").split("/")[^1]
of "followers": profile.followers = text
of "following": profile.following = text
else: profile.tweets = text
proc getIntentStats*(profile: var Profile; node: XmlNode) =
profile.tweets = "?"
for s in node.querySelectorAll( "dd.count > a"):
let text = s.innerText()
case s.getAttr("href").split("/")[^1]
of "followers": profile.followers = text
of "following": profile.following = text
proc getTweetStats*(tweet: var Tweet; node: XmlNode) =
tweet.replies = "0"
tweet.retweets = "0"
tweet.likes = "0"
for action in node.querySelectorAll(".ProfileTweet-actionCountForAria"):
let text = action.innerText.split()
case text[1]
of "replies": tweet.replies = text[0]
of "likes": tweet.likes = text[0]
of "retweets": tweet.retweets = text[0]
proc getTweetMedia*(tweet: var Tweet; node: XmlNode) =
for photo in node.querySelectorAll(".AdaptiveMedia-photoContainer"):
tweet.photos.add photo.attrs["data-image-url"]
let player = node.selectAttr(".PlayableMedia-player", "style")
if player.len == 0:
return
let thumb = player.replace(thumbRegex, "$1")
if "tweet_video" in thumb:
tweet.gif = some(thumb.replace(gifRegex, "$1"))
else:
tweet.videoThumb = some(thumb)