diff --git a/youtube_transcript_api/__init__.py b/youtube_transcript_api/__init__.py index 7f703f4..a9c90b8 100644 --- a/youtube_transcript_api/__init__.py +++ b/youtube_transcript_api/__init__.py @@ -14,4 +14,6 @@ FailedToCreateConsentCookie, YouTubeRequestFailed, InvalidVideoId, + VideoUnplayable, + LoginRequired ) diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py index d652c59..c00c811 100644 --- a/youtube_transcript_api/_errors.py +++ b/youtube_transcript_api/_errors.py @@ -8,6 +8,7 @@ class CouldNotRetrieveTranscript(Exception): ERROR_MESSAGE = '\nCould not retrieve a transcript for the video {video_url}!' CAUSE_MESSAGE_INTRO = ' This is most likely caused by:\n\n{cause}' CAUSE_MESSAGE = '' + REASON_MESSAGE = '{cause}: {reason}\n{subreason}' GITHUB_REFERRAL = ( '\n\nIf you are sure that the described cause is not responsible for this error ' 'and that a transcript should be retrievable, please create an issue at ' @@ -17,7 +18,8 @@ class CouldNotRetrieveTranscript(Exception): 'Also make sure that there are no open issues which already describe your problem!' ) - def __init__(self, video_id): + def __init__(self, video_id, playability=None): + self.playability = playability self.video_id = video_id super(CouldNotRetrieveTranscript, self).__init__(self._build_error_message()) @@ -32,6 +34,14 @@ def _build_error_message(self): @property def cause(self): + if self.playability: + # if self.playability IS NOT None, use the playability error reason the API presented. + + subreason = get_playability_subreason(self.playability) + return self.REASON_MESSAGE.format( + cause=self.CAUSE_MESSAGE, + reason=self.playability.get("reason"), + subreason=subreason) return self.CAUSE_MESSAGE @@ -100,6 +110,12 @@ class CookiesInvalid(CouldNotRetrieveTranscript): class FailedToCreateConsentCookie(CouldNotRetrieveTranscript): CAUSE_MESSAGE = 'Failed to automatically give consent to saving cookies' +class VideoUnplayable(CouldNotRetrieveTranscript): + CAUSE_MESSAGE = 'Unplayable video' + +class LoginRequired(CouldNotRetrieveTranscript): + CAUSE_MESSAGE = 'Login required' + class NoTranscriptFound(CouldNotRetrieveTranscript): CAUSE_MESSAGE = ( @@ -118,3 +134,38 @@ def cause(self): requested_language_codes=self._requested_language_codes, transcript_data=str(self._transcript_data), ) + + +def get_playability_error(playability_json): + """ + Using the json extracted from playabilityStatus, + returns a custom error based on the value of the "status" key. + + Anything that is not {"status": "OK"} is likely an error. + """ + reason = playability_json.get("status") + if reason == 'LOGIN_REQUIRED': + # error for age related playability + return LoginRequired + elif reason == 'UNPLAYABLE': + # error for region/country lock playability + return VideoUnplayable + else: + # error fallback + return TranscriptsDisabled + + +def get_playability_subreason(playability_json): + """ + Traverses playability json nested struct to pick out the subreason, if any. + """ + + # check for each nested keys and fail fast if they dont exist. + error_screen = playability_json.get("errorScreen", {}) + renderer = error_screen.get("playerErrorMessageRenderer", {}) + subreason = renderer.get("subreason", {}).get("runs", []) + + if subreason: + return subreason[0].get('text', '') + + return "" \ No newline at end of file diff --git a/youtube_transcript_api/_transcripts.py b/youtube_transcript_api/_transcripts.py index 3d9a5e7..bf8d8ed 100644 --- a/youtube_transcript_api/_transcripts.py +++ b/youtube_transcript_api/_transcripts.py @@ -25,6 +25,9 @@ NoTranscriptAvailable, FailedToCreateConsentCookie, InvalidVideoId, + VideoUnplayable, + LoginRequired, + get_playability_error ) from ._settings import WATCH_URL @@ -50,7 +53,6 @@ def fetch(self, video_id): def _extract_captions_json(self, html, video_id): splitted_html = html.split('"captions":') - if len(splitted_html) <= 1: if video_id.startswith('http://') or video_id.startswith('https://'): raise InvalidVideoId(video_id) @@ -58,9 +60,26 @@ def _extract_captions_json(self, html, video_id): raise TooManyRequests(video_id) if '"playabilityStatus":' not in html: raise VideoUnavailable(video_id) - - raise TranscriptsDisabled(video_id) - + + # attempt to parse the playability reason from the html. + playability_splitted_html = html.split('"playabilityStatus":') + if len(playability_splitted_html) <= 1: + # if we didnt find "playabilityStatus" to split on, fallback. + raise TranscriptsDisabled(video_id) + + # if we cannot split on videoDetails (a key after "playabilityStatus") + raw_details = playability_splitted_html[1].split(',"videoDetails') + if len(raw_details) <= 1: + raise TranscriptsDisabled(video_id) + + playability_status_json = json.loads( + raw_details[0].replace('\n', '') + ) + + playability_error = get_playability_error(playability_status_json) + raise playability_error(video_id, playability_status_json) + + # we were able to split on "captions": captions_json = json.loads( splitted_html[1].split(',"videoDetails')[0].replace('\n', '') ).get('playerCaptionsTracklistRenderer') diff --git a/youtube_transcript_api/test/assets/youtube_transcripts_disabled.html.static b/youtube_transcript_api/test/assets/youtube_transcripts_disabled.html.static index 5bda10b..680abe0 100644 --- a/youtube_transcript_api/test/assets/youtube_transcripts_disabled.html.static +++ b/youtube_transcript_api/test/assets/youtube_transcripts_disabled.html.static @@ -503,9 +503,7 @@ Wird geladen... - +
diff --git a/youtube_transcript_api/test/assets/youtube_video_login_required.html.static b/youtube_transcript_api/test/assets/youtube_video_login_required.html.static new file mode 100644 index 0000000..2ba841e --- /dev/null +++ b/youtube_transcript_api/test/assets/youtube_video_login_required.html.static @@ -0,0 +1,787 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 《機動戰士鋼彈 水星的魔女 Season2》第23話 (繁中字幕 | 日語原聲)【Ani-One】 - YouTube + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+
+
+ + +
+
+ +
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + + + + + + + + +
+
+
+ + + + +
+
+
+
+
+
+
AboutPressCopyrightContact usCreatorsAdvertiseDevelopersTermsPrivacyPolicy & SafetyHow YouTube worksTest new featuresNFL Sunday Ticket + +
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/youtube_transcript_api/test/assets/youtube_video_unplayable.html.static b/youtube_transcript_api/test/assets/youtube_video_unplayable.html.static new file mode 100644 index 0000000..3a8f891 --- /dev/null +++ b/youtube_transcript_api/test/assets/youtube_video_unplayable.html.static @@ -0,0 +1,787 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 《機動戰士鋼彈 水星的魔女 Season2》第23話 (繁中字幕 | 日語原聲)【Ani-One】 - YouTube + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+
+
+
+
+
+ + +
+
+ +
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + + + + + + + + +
+
+
+ + + + +
+
+
+
+
+
+
AboutPressCopyrightContact usCreatorsAdvertiseDevelopersTermsPrivacyPolicy & SafetyHow YouTube worksTest new featuresNFL Sunday Ticket + +
+ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/youtube_transcript_api/test/test_api.py b/youtube_transcript_api/test/test_api.py index d6f5e0c..e66a403 100644 --- a/youtube_transcript_api/test/test_api.py +++ b/youtube_transcript_api/test/test_api.py @@ -1,3 +1,4 @@ +import os from unittest import TestCase from mock import patch @@ -21,6 +22,8 @@ FailedToCreateConsentCookie, YouTubeRequestFailed, InvalidVideoId, + VideoUnplayable, + LoginRequired ) @@ -198,6 +201,26 @@ def test_get_transcript__exception_if_video_unavailable(self): with self.assertRaises(VideoUnavailable): YouTubeTranscriptApi.get_transcript('abc') + def test_get_transcript__exception_if_video_unplayable(self): + httpretty.register_uri( + httpretty.GET, + 'https://www.youtube.com/watch', + body=load_asset('youtube_video_unplayable.html.static') + ) + + with self.assertRaises(VideoUnplayable): + YouTubeTranscriptApi.get_transcript('kZsVStYdmws') + + def test_get_transcript__exception_if_login_required(self): + httpretty.register_uri( + httpretty.GET, + 'https://www.youtube.com/watch', + body=load_asset('youtube_video_login_required.html.static') + ) + + with self.assertRaises(LoginRequired): + YouTubeTranscriptApi.get_transcript('4FN12sqoC4Y') + def test_get_transcript__exception_if_youtube_request_fails(self): httpretty.register_uri( httpretty.GET,