2023-08-29 23:45:18 +02:00
#SPDX-License-Identifier: AGPL-3.0-only
2023-11-17 22:54:04 +01:00
import std / [ httpclient , asyncdispatch , times , json , random , sequtils , strutils , tables , packedsets , os , uri ]
2023-11-14 23:51:08 +01:00
import nimcrypto
2023-11-17 23:09:45 +01:00
import types
2023-09-18 20:24:23 +02:00
import experimental / parser / guestaccount
2020-06-01 02:16:24 +02:00
2023-08-19 00:25:14 +02:00
# max requests at a time per account to avoid race conditions
2023-08-20 11:56:42 +02:00
const
2023-08-22 01:32:09 +02:00
maxConcurrentReqs = 2
2023-08-20 11:56:42 +02:00
dayInSeconds = 24 * 60 * 60
2023-10-31 13:04:32 +01:00
apiMaxReqs : Table [ Api , int ] = {
Api . search : 50 ,
Api . tweetDetail : 150 ,
Api . photoRail : 180 ,
Api . userTweets : 500 ,
Api . userTweetsAndReplies : 500 ,
Api . userMedia : 500 ,
Api . userRestId : 500 ,
Api . userScreenName : 500 ,
Api . tweetResult : 500 ,
Api . list : 500 ,
Api . listTweets : 500 ,
Api . listMembers : 500 ,
Api . listBySlug : 500
} . toTable
2021-01-18 07:47:51 +01:00
2020-07-09 09:18:14 +02:00
var
2023-08-19 00:25:14 +02:00
accountPool : seq [ GuestAccount ]
2022-06-05 21:47:25 +02:00
enableLogging = false
2023-08-30 03:04:22 +02:00
template log ( str : varargs [ string , ` $ ` ] ) =
2023-08-30 03:43:49 +02:00
if enableLogging : echo " [accounts] " , str . join ( " " )
2021-01-13 14:32:26 +01:00
2023-11-01 06:09:44 +01:00
proc snowflakeToEpoch ( flake : int64 ) : int64 =
int64 ( ( ( flake shr 22 ) + 1288834974657 ) div 1000 )
proc hasExpired ( account : GuestAccount ) : bool =
let
created = snowflakeToEpoch ( account . id )
now = epochTime ( ) . int64
2023-11-04 03:56:32 +01:00
daysOld = int ( now - created ) div dayInSeconds
2023-11-01 06:09:44 +01:00
return daysOld > 30
2023-10-31 13:04:32 +01:00
proc getAccountPoolHealth * ( ) : JsonNode =
let now = epochTime ( ) . int
2022-01-06 00:42:18 +01:00
var
totalReqs = 0
2023-11-04 03:56:32 +01:00
limited : PackedSet [ int64 ]
2022-01-06 00:42:18 +01:00
reqsPerApi : Table [ string , int ]
2023-11-01 00:02:45 +01:00
oldest = now . int64
newest = 0 'i64
average = 0 'i64
2023-10-31 13:04:32 +01:00
for account in accountPool :
2023-11-01 06:09:44 +01:00
let created = snowflakeToEpoch ( account . id )
2023-10-31 13:04:32 +01:00
if created > newest :
newest = created
if created < oldest :
oldest = created
2023-11-01 00:02:45 +01:00
average + = created
2023-10-31 13:04:32 +01:00
for api in account . apis . keys :
let
apiStatus = account . apis [ api ]
reqs = apiMaxReqs [ api ] - apiStatus . remaining
if apiStatus . limited :
limited . incl account . id
2022-01-06 00:42:18 +01:00
2023-11-01 06:44:08 +01:00
# no requests made with this account and endpoint since the limit reset
if apiStatus . reset < now :
continue
reqsPerApi . mgetOrPut ( $ api , 0 ) . inc reqs
totalReqs . inc reqs
2023-10-31 13:04:32 +01:00
if accountPool . len > 0 :
average = average div accountPool . len
else :
oldest = 0
average = 0
return % * {
" accounts " : % * {
" total " : accountPool . len ,
" limited " : limited . card ,
" oldest " : $ fromUnix ( oldest ) ,
" newest " : $ fromUnix ( newest ) ,
" average " : $ fromUnix ( average )
} ,
" requests " : % * {
" total " : totalReqs ,
" apis " : reqsPerApi
}
}
proc getAccountPoolDebug * ( ) : JsonNode =
2023-08-20 11:56:42 +02:00
let now = epochTime ( ) . int
2023-10-31 13:04:32 +01:00
var list = newJObject ( )
2023-08-19 01:13:36 +02:00
2023-08-19 00:25:14 +02:00
for account in accountPool :
2023-08-29 23:45:18 +02:00
let accountJson = % * {
2022-01-05 22:49:16 +01:00
" apis " : newJObject ( ) ,
2023-08-19 00:25:14 +02:00
" pending " : account . pending ,
2022-01-05 22:49:16 +01:00
}
2023-08-19 00:25:14 +02:00
for api in account . apis . keys :
2023-08-22 03:43:18 +02:00
let
apiStatus = account . apis [ api ]
obj = % * { }
2023-08-20 11:56:42 +02:00
2023-08-22 03:43:18 +02:00
if apiStatus . reset > now . int :
obj [ " remaining " ] = % apiStatus . remaining
2023-08-19 01:13:36 +02:00
2023-08-22 03:43:18 +02:00
if " remaining " notin obj and not apiStatus . limited :
2023-08-20 11:56:42 +02:00
continue
2022-01-06 00:42:18 +01:00
2023-08-29 23:45:18 +02:00
if apiStatus . limited :
obj [ " limited " ] = % true
accountJson { " apis " , $ api } = obj
2023-10-31 13:04:32 +01:00
list [ $ account . id ] = accountJson
2023-08-29 23:45:18 +02:00
2023-10-31 13:04:32 +01:00
return % list
2021-01-13 14:32:26 +01:00
proc rateLimitError * ( ) : ref RateLimitError =
2022-01-05 22:48:45 +01:00
newException ( RateLimitError , " rate limited " )
2020-06-01 02:16:24 +02:00
2023-08-19 00:25:14 +02:00
proc isLimited ( account : GuestAccount ; api : Api ) : bool =
if account . isNil :
2022-01-05 22:48:45 +01:00
return true
2023-08-19 00:25:14 +02:00
if api in account . apis :
let limit = account . apis [ api ]
2023-08-20 11:56:42 +02:00
if limit . limited and ( epochTime ( ) . int - limit . limitedAt ) > dayInSeconds :
account . apis [ api ] . limited = false
2023-08-30 03:04:22 +02:00
log " resetting limit, api: " , api , " , id: " , account . id
2023-08-20 11:56:42 +02:00
return limit . limited or ( limit . remaining < = 10 and limit . reset > epochTime ( ) . int )
2022-01-05 22:48:45 +01:00
else :
return false
2020-06-01 02:16:24 +02:00
2023-08-19 00:25:14 +02:00
proc isReady ( account : GuestAccount ; api : Api ) : bool =
not ( account . isNil or account . pending > maxConcurrentReqs or account . isLimited ( api ) )
2022-01-05 23:38:46 +01:00
2023-08-30 03:04:22 +02:00
proc invalidate * ( account : var GuestAccount ) =
2023-08-19 00:25:14 +02:00
if account . isNil : return
2023-08-30 03:04:22 +02:00
log " invalidating expired account: " , account . id
2022-06-05 21:47:25 +02:00
2023-08-30 03:04:22 +02:00
# TODO: This isn't sufficient, but it works for now
let idx = accountPool . find ( account )
if idx > - 1 : accountPool . delete ( idx )
account = nil
2023-08-31 01:29:54 +02:00
proc release * ( account : GuestAccount ) =
2023-08-30 03:04:22 +02:00
if account . isNil : return
dec account . pending
2020-06-01 02:16:24 +02:00
2023-08-19 00:25:14 +02:00
proc getGuestAccount * ( api : Api ) : Future [ GuestAccount ] {. async . } =
for i in 0 .. < accountPool . len :
2022-01-05 23:38:46 +01:00
if result . isReady ( api ) : break
2023-08-19 00:25:14 +02:00
result = accountPool . sample ( )
2021-01-13 14:32:26 +01:00
2023-08-19 00:25:14 +02:00
if not result . isNil and result . isReady ( api ) :
2022-01-05 23:38:46 +01:00
inc result . pending
else :
2023-08-30 03:04:22 +02:00
log " no accounts available for API: " , api
2021-01-13 14:32:26 +01:00
raise rateLimitError ( )
2023-08-30 03:04:22 +02:00
proc setLimited * ( account : GuestAccount ; api : Api ) =
account . apis [ api ] . limited = true
account . apis [ api ] . limitedAt = epochTime ( ) . int
log " rate limited, api: " , api , " , reqs left: " , account . apis [ api ] . remaining , " , id: " , account . id
2023-08-19 00:25:14 +02:00
proc setRateLimit * ( account : GuestAccount ; api : Api ; remaining , reset : int ) =
2022-01-05 23:38:46 +01:00
# avoid undefined behavior in race conditions
2023-08-19 00:25:14 +02:00
if api in account . apis :
let limit = account . apis [ api ]
2022-01-05 23:38:46 +01:00
if limit . reset > = reset and limit . remaining < remaining :
return
2023-08-19 00:25:14 +02:00
if limit . reset = = reset and limit . remaining > = remaining :
account . apis [ api ] . remaining = remaining
return
2022-01-05 23:38:46 +01:00
2023-08-19 00:25:14 +02:00
account . apis [ api ] = RateLimit ( remaining : remaining , reset : reset )
2020-06-01 02:16:24 +02:00
2023-09-18 20:24:23 +02:00
proc initAccountPool * ( cfg : Config ; path : string ) =
2022-06-05 21:47:25 +02:00
enableLogging = cfg . enableDebug
2020-11-07 21:31:03 +01:00
2023-09-18 20:24:23 +02:00
let jsonlPath = if path . endsWith ( " .json " ) : ( path & ' l ' ) else : path
if fileExists ( jsonlPath ) :
log " Parsing JSONL guest accounts file: " , jsonlPath
for line in jsonlPath . lines :
accountPool . add parseGuestAccount ( line )
elif fileExists ( path ) :
log " Parsing JSON guest accounts file: " , path
accountPool = parseGuestAccounts ( path )
2023-11-17 20:07:14 +01:00
elif not cfg . guestAccountsUsePool :
echo " [accounts] ERROR: " , path , " not found. This file is required to authenticate API requests. Alternatively, configure the guest account pool in nitter.conf "
2023-09-18 20:24:23 +02:00
quit 1
2023-11-01 06:09:44 +01:00
accountPool . keepItIf ( not it . hasExpired )
log " Successfully added " , accountPool . len , " valid accounts. "
2023-11-14 23:51:08 +01:00
2023-11-17 20:07:14 +01:00
proc updateAccountPool * ( cfg : Config ) {. async . } =
if not cfg . guestAccountsUsePool :
return
2023-11-17 22:48:51 +01:00
# wait for a few seconds before fetching guest accounts, so that
# /.well-known/... is served correctly
await sleepAsync ( 10 * 1000 )
2023-11-17 20:07:14 +01:00
while true :
if accountPool . len = = 0 :
log " fetching more accounts from service "
2023-11-17 23:09:45 +01:00
let client = newAsyncHttpClient ( )
2023-11-17 22:48:51 +01:00
try :
2023-11-17 23:09:45 +01:00
let resp = await client . get ( $ ( cfg . guestAccountsPoolUrl ? { " id " : cfg . guestAccountsPoolId , " auth " : cfg . guestAccountsPoolAuth } ) )
2023-11-17 22:48:51 +01:00
let guestAccounts = await resp . body
log " status code from service: " , resp . status
for line in guestAccounts . splitLines :
if line ! = " " :
accountPool . add parseGuestAccount ( line )
2023-11-17 20:07:14 +01:00
2023-11-17 22:48:51 +01:00
except Exception as e :
log " failed to fetch from accounts service: " , e . msg
2023-11-17 23:09:45 +01:00
finally :
client . close ( )
2023-11-17 20:07:14 +01:00
2023-11-17 22:48:51 +01:00
accountPool . keepItIf ( not it . hasExpired )
2023-11-17 20:07:14 +01:00
2023-11-17 22:48:51 +01:00
await sleepAsync ( 3600 * 1000 )
2023-11-17 20:07:14 +01:00
2023-11-14 23:51:08 +01:00
proc getAuthHash * ( cfg : Config ) : string =
2023-11-17 22:54:04 +01:00
if cfg . guestAccountsPoolAuth . len = = 0 :
2023-11-17 20:07:14 +01:00
# If somebody turns on pool auth and provides a dummy key, we should
# prevent third parties from using that mis-configured auth and impersonate
# this instance
2023-11-17 22:54:04 +01:00
log " poolAuth is empty, authentication with accounts service will fail "
2023-11-14 23:51:08 +01:00
return " "
2023-11-17 20:07:14 +01:00
let hashStr = $ sha_256 . digest ( cfg . guestAccountsPoolAuth )
2023-11-14 23:51:08 +01:00
return hashStr . toLowerAscii