mirror of https://git.sr.ht/~cadence/NewLeaf
Change cookies to skip EU cookie consent page
See https://github.com/benbusby/whoogle-search/issues/311 for some
context.
We're now implementing
a726009987/youtube_dl/extractor/youtube.py (L263-L264)
This commit is contained in:
parent
18f5ef4c62
commit
7d3b79b1cd
|
@ -3,7 +3,7 @@ import dateutil.parser
|
||||||
import requests
|
import requests
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from tools.converters import *
|
from tools.converters import *
|
||||||
from tools.extractors import extract_yt_initial_data
|
from tools.extractors import extract_yt_initial_data, eu_consent_cookie
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ def extract_channel(ucid):
|
||||||
return channel_cache[ucid]
|
return channel_cache[ucid]
|
||||||
|
|
||||||
channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user"
|
channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user"
|
||||||
with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies={"CONSENT": "PENDING+999"}) as r:
|
with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies=eu_consent_cookie()) as r:
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ import requests
|
||||||
import traceback
|
import traceback
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from tools.converters import *
|
from tools.converters import *
|
||||||
from tools.extractors import extract_yt_initial_data
|
from tools.extractors import extract_yt_initial_data, eu_consent_cookie
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
|
|
||||||
search_cache = TTLCache(maxsize=50, ttl=300)
|
search_cache = TTLCache(maxsize=50, ttl=300)
|
||||||
|
@ -17,7 +17,7 @@ ytdl = yt_dlp.YoutubeDL(ytdl_opts)
|
||||||
|
|
||||||
def extract_search(q):
|
def extract_search(q):
|
||||||
try:
|
try:
|
||||||
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies={"CONSENT": "PENDING+999"}) as r:
|
with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies=eu_consent_cookie()) as r:
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
content = r.content.decode("utf8")
|
content = r.content.decode("utf8")
|
||||||
yt_initial_data = extract_yt_initial_data(content)
|
yt_initial_data = extract_yt_initial_data(content)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
|
|
||||||
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
|
||||||
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
|
r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
|
||||||
|
@ -19,3 +20,6 @@ def extract_yt_initial_player_response(content):
|
||||||
return yt_initial_player_response
|
return yt_initial_player_response
|
||||||
else:
|
else:
|
||||||
raise Exception("Could not match ytInitialPlayerResponse in content")
|
raise Exception("Could not match ytInitialPlayerResponse in content")
|
||||||
|
|
||||||
|
def eu_consent_cookie():
|
||||||
|
return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}
|
||||||
|
|
Loading…
Reference in New Issue