Compare commits

...

8 Commits

Author SHA1 Message Date
Leng a6065c1203
Merge 28a20ba221 into 0fba08485b 2024-09-05 16:30:10 -07:00
sepro 0fba08485b
[ie/khanacademy] Fix extractor (#10913)
Closes #10912
Authored by: seproDev
2024-09-05 20:47:14 +02:00
Simon Sawicki b6200bdcf3
[ci] Add comment sanitization workflow (#10915)
Co-authored-by: bashonly <bashonly@protonmail.com>
Authored by: bashonly, Grub4K
2024-09-05 20:06:15 +02:00
leng.sin 28a20ba221 add 'reactor_count' into test. 2024-09-02 10:45:50 +08:00
leng.sin 6d662e56e4 Resolve comments.
1. alter code according to standard.
2024-09-02 10:41:34 +08:00
leng.sin 540bd12552 Resolve comments.
1. reuse existing fields instead of creating a new one.
2. test fields test only if fields value are dynamic.
2024-08-30 10:51:06 +08:00
leng.sin f081367164 feat: extract owner name. 2024-08-28 16:09:08 +08:00
leng.sin 078fb0f841 [extractor/facebook] Add extract total_comment_count, unified_reactors_count and share_count of the video. (yt-dlp#10824) 2024-08-27 11:07:25 +08:00
11 changed files with 73 additions and 20 deletions

View File

@ -80,5 +80,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -92,5 +92,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -88,5 +88,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -73,5 +73,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -67,5 +67,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -73,5 +73,8 @@ body:
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View File

@ -1,4 +1,4 @@
name: Anti-Spam
name: Issue Lockdown
on:
issues:
types: [opened]
@ -9,6 +9,7 @@ permissions:
jobs:
lockdown:
name: Issue Lockdown
if: vars.ISSUE_LOCKDOWN
runs-on: ubuntu-latest
steps:
- name: "Lock new issue"
@ -17,4 +18,4 @@ jobs:
ISSUE_NUMBER: ${{ github.event.issue.number }}
REPOSITORY: ${{ github.repository }}
run: |
gh issue lock "${ISSUE_NUMBER}" -r too_heated -R "${REPOSITORY}"
gh issue lock "${ISSUE_NUMBER}" -R "${REPOSITORY}"

17
.github/workflows/sanitize-comment.yml vendored Normal file
View File

@ -0,0 +1,17 @@
name: Sanitize comment
on:
issue_comment:
types: [created, edited]
permissions:
issues: write
jobs:
sanitize-comment:
name: Sanitize comment
if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request
runs-on: ubuntu-latest
steps:
- name: Sanitize comment
uses: yt-dlp/sanitize-comment@v1

View File

@ -49,8 +49,11 @@ VERBOSE_TMPL = '''
- type: markdown
attributes:
value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer.
### If you receive any replies asking you download a file, do NOT follow the download links!
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.
'''.strip()
NO_SKIP = '''

View File

@ -481,6 +481,9 @@ class FacebookIE(InfoExtractor):
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
post_stats = traverse_obj(post, (
lambda _, v: v['short_form_video_context']['video']['id'] == video_id and v[
'url'] == f'https://www.facebook.com/reel/{video_id}/', 'feedback', {dict}, any)) or {}
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
title = get_first(media, ('title', 'text'))
@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
or get_first(post, ('event', 'event_creator', {dict}))
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict}), default=None) or {})
uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex(
@ -524,6 +528,12 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
**traverse_obj(post_stats, {
'like_count': ('likers', 'count', {int}),
'comment_count': ('total_comment_count', {int}),
'repost_count': ('share_count_reduced', {parse_count}),
'reactor_count': ('unified_reactors', 'count', {int}),
}),
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -938,12 +948,16 @@ class FacebookReelIE(InfoExtractor):
'ext': 'mp4',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'uploader': 'Beast Camp Training',
'uploader_id': '100040874179269',
'duration': 9.579,
'timestamp': 1637502609,
'upload_date': '20211121',
'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int,
'repost_count': int,
'reactor_count': int,
},
}]

View File

@ -15,7 +15,7 @@ from ..utils import (
class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
_PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
def _parse_video(self, video):
return {
@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
query={
'fastly_cacheable': 'persist_until_publish',
'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265',
'hash': '3712657851',
'variables': json.dumps({
'path': display_id,
'countryCode': 'US',