Change cookies to skip EU cookie consent page

See https://github.com/benbusby/whoogle-search/issues/311 for some context. We're now implementing a726009987/youtube_dl/extractor/youtube.py (L263-L264)
2021-05-14 18:49:25 +02:00 · 2021-05-14 18:49:25 +02:00 · 7d3b79b1cd
parent 18f5ef4c62
commit 7d3b79b1cd
3 changed files with 8 additions and 4 deletions
--- a/extractors/channel.py
+++ b/extractors/channel.py
@ -3,7 +3,7 @@ import dateutil.parser
 import requests
 import xml.etree.ElementTree as ET
 from tools.converters import *
-from tools.extractors import extract_yt_initial_data
+from tools.extractors import extract_yt_initial_data, eu_consent_cookie
 from threading import Lock
 from cachetools import TTLCache

@ -18,7 +18,7 @@ def extract_channel(ucid):
 			return channel_cache[ucid]

 	channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else "user"
-	with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies={"CONSENT": "PENDING+999"}) as r:
+	with requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies=eu_consent_cookie()) as r:
 		r.raise_for_status()
 		yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))

--- a/extractors/search.py
+++ b/extractors/search.py
@ -2,7 +2,7 @@ import requests
 import traceback
 import yt_dlp
 from tools.converters import *
-from tools.extractors import extract_yt_initial_data
+from tools.extractors import extract_yt_initial_data, eu_consent_cookie
 from cachetools import TTLCache

 search_cache = TTLCache(maxsize=50, ttl=300)
@ -17,7 +17,7 @@ ytdl = yt_dlp.YoutubeDL(ytdl_opts)

 def extract_search(q):
 	try:
-		with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies={"CONSENT": "PENDING+999"}) as r:
+		with requests.get("https://www.youtube.com/results", params={"q": q, "hl": "en"}, cookies=eu_consent_cookie()) as r:
 			r.raise_for_status()
 			content = r.content.decode("utf8")
 			yt_initial_data = extract_yt_initial_data(content)
--- a/tools/extractors.py
+++ b/tools/extractors.py
@ -1,5 +1,6 @@
 import re
 import json
+import random

 r_yt_initial_data = re.compile(r"""(?:^\s*window\["ytInitialData"\]|var ytInitialData) = (\{.+?\});(?:\s*$|</script>)""", re.S + re.M)
 r_yt_initial_player_response = re.compile(r"""(?:^\s*window\["ytInitialPlayerResponse"\]|var ytInitialPlayerResponse) = (\{.+?\});(?:\s*$|</script>|var )""", re.S + re.M)
@ -19,3 +20,6 @@ def extract_yt_initial_player_response(content):
 		return yt_initial_player_response
 	else:
 		raise Exception("Could not match ytInitialPlayerResponse in content")
+
+def eu_consent_cookie():
+	return {"CONSENT": "YES+cb.20210509-17-p0.en+F+{}".format(random.randint(100, 999))}