mirror of
https://git.sr.ht/~cadence/NewLeaf
synced 2025-02-19 15:55:44 +00:00
Compare commits
2 Commits
1ea86101fd
...
7062999921
Author | SHA1 | Date | |
---|---|---|---|
|
7062999921 | ||
|
3f57d50893 |
9
.gitignore
vendored
9
.gitignore
vendored
@ -10,3 +10,12 @@ __pycache__
|
|||||||
# Personal
|
# Personal
|
||||||
/generic-updater
|
/generic-updater
|
||||||
/configuration.py
|
/configuration.py
|
||||||
|
|
||||||
|
# Various venv names, several options to allow the user to make stuff up
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env
|
||||||
|
venv
|
||||||
|
env.bak
|
||||||
|
venv.bak
|
||||||
|
newleaf-venv
|
||||||
|
47
extractors/comments.py
Normal file
47
extractors/comments.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import urllib.parse
|
||||||
|
from tools.converters import *
|
||||||
|
from tools.extractors import extract_yt_initial_data, extract_yt_cfg, eu_consent_cookie
|
||||||
|
|
||||||
|
def extract_comments(id, **kwargs):
|
||||||
|
s = requests.session()
|
||||||
|
s.headers.update({"accept-language": "en-US,en;q=0.9"})
|
||||||
|
s.cookies.set("CONSENT", eu_consent_cookie().get("CONSENT"))
|
||||||
|
with s.get("https://www.youtube.com/watch?v={}".format(id)) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
||||||
|
item = yt_initial_data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][2]["itemSectionRenderer"]
|
||||||
|
continuation = item["continuations"][0]["nextContinuationData"]["continuation"]
|
||||||
|
itct = item["continuations"][0]["nextContinuationData"]["clickTrackingParams"]
|
||||||
|
xsrf_token = extract_yt_cfg(r.content.decode("utf8")).get("XSRF_TOKEN", None)
|
||||||
|
if not xsrf_token:
|
||||||
|
cherrypy.response.status = 500
|
||||||
|
return {
|
||||||
|
"error": "NewLeaf was unable to obtain XSRF_TOKEN from ytcfg.",
|
||||||
|
"identifier": "XSRF_TOKEN_NOT_FOUND"
|
||||||
|
}
|
||||||
|
url = "https://www.youtube.com/comment_service_ajax?action_get_comments=1&pbj=1&ctoken={}&continuation={}&type=next&itct={}".format(continuation, continuation, urllib.parse.quote_plus(itct))
|
||||||
|
with s.post(url, headers={"x-youtube-client-name": "1", "x-youtube-client-version": "2.20210422.04.00"}, data={"session_token": xsrf_token}) as rr:
|
||||||
|
data = json.loads(rr.content.decode("utf8"))
|
||||||
|
return {
|
||||||
|
"videoId": id,
|
||||||
|
"comments": [
|
||||||
|
{
|
||||||
|
"author": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorText"]["simpleText"],
|
||||||
|
"authorThumbnails": [x for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorThumbnail"]["thumbnails"]],
|
||||||
|
"authorId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["browseId"],
|
||||||
|
"authorUrl": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"],
|
||||||
|
"isEdited": " (edited)" in "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
|
||||||
|
"content": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]]),
|
||||||
|
"contentHtml": escape_html_textcontent("".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]])),
|
||||||
|
"publishedText": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
|
||||||
|
# "likeCount": int(c["commentThreadRenderer"]["comment"]["commentRenderer"]["voteCount"]["simpleText"].replace(",", ""))
|
||||||
|
"commentId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["commentId"],
|
||||||
|
"authorIsChannelOwner": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorIsChannelOwner"],
|
||||||
|
# "replies": {
|
||||||
|
# "replyCount": c["commentThreadRenderer"]["comment"]["commentRenderer"]["replyCount"]
|
||||||
|
# }
|
||||||
|
} for c in data["response"]["continuationContents"]["itemSectionContinuation"]["contents"]
|
||||||
|
]
|
||||||
|
}
|
9
index.py
9
index.py
@ -9,6 +9,7 @@ from extractors.manifest import extract_manifest
|
|||||||
from extractors.search import extract_search
|
from extractors.search import extract_search
|
||||||
from extractors.suggestions import extract_search_suggestions
|
from extractors.suggestions import extract_search_suggestions
|
||||||
from extractors.captions import extract_captions
|
from extractors.captions import extract_captions
|
||||||
|
from extractors.comments import extract_comments
|
||||||
import configuration
|
import configuration
|
||||||
|
|
||||||
@cherrypy.tools.register("before_finalize", priority=60)
|
@cherrypy.tools.register("before_finalize", priority=60)
|
||||||
@ -26,7 +27,8 @@ class NewLeaf(object):
|
|||||||
["channels", 1, 2],
|
["channels", 1, 2],
|
||||||
["videos", 1, 1],
|
["videos", 1, 1],
|
||||||
["search", 0, 1],
|
["search", 0, 1],
|
||||||
["captions", 1, 1]
|
["captions", 1, 1],
|
||||||
|
["comments", 1, 1]
|
||||||
]
|
]
|
||||||
for e in endpoints:
|
for e in endpoints:
|
||||||
if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:
|
if vpath[2] == e[0] and len(vpath) >= e[1]+3 and len(vpath) <= e[2]+3:
|
||||||
@ -114,6 +116,11 @@ class NewLeaf(object):
|
|||||||
"identifier": "NO_MATCHING_CAPTIONS"
|
"identifier": "NO_MATCHING_CAPTIONS"
|
||||||
}), "utf8")
|
}), "utf8")
|
||||||
|
|
||||||
|
@cherrypy.expose
|
||||||
|
@cherrypy.tools.json_out()
|
||||||
|
def comments(self, id, **kwargs):
|
||||||
|
return extract_comments(id)
|
||||||
|
|
||||||
@cherrypy.expose
|
@cherrypy.expose
|
||||||
def vi(self, id, file):
|
def vi(self, id, file):
|
||||||
with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r:
|
with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r:
|
||||||
|
@ -4,6 +4,7 @@ import random
|
|||||||
|
|
||||||
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
||||||
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
|
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
|
||||||
|
r_yt_cfg = re.compile(r"""ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;""")
|
||||||
|
|
||||||
def extract_yt_initial_data(content):
|
def extract_yt_initial_data(content):
|
||||||
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
m_yt_initial_data = re.search(r_yt_initial_data, content)
|
||||||
@ -21,5 +22,11 @@ def extract_yt_initial_player_response(content):
|
|||||||
else:
|
else:
|
||||||
raise Exception("Could not match ytInitialPlayerResponse in content")
|
raise Exception("Could not match ytInitialPlayerResponse in content")
|
||||||
|
|
||||||
|
def extract_yt_cfg(content):
|
||||||
|
m_yt_cfg = re.search(r_yt_cfg, content)
|
||||||
|
if m_yt_cfg:
|
||||||
|
return json.loads(m_yt_cfg.group(1))
|
||||||
|
raise Exception("Could not match ytcfg in content")
|
||||||
|
|
||||||
def eu_consent_cookie():
|
def eu_consent_cookie():
|
||||||
return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}
|
return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}
|
||||||
|
Loading…
Reference in New Issue
Block a user