Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Client Error: Too Many Requests for url #331

Open
MohamedFakhry2007 opened this issue Sep 14, 2024 · 1 comment
Open

Client Error: Too Many Requests for url #331

MohamedFakhry2007 opened this issue Sep 14, 2024 · 1 comment

Comments

@MohamedFakhry2007
Copy link

MohamedFakhry2007 commented Sep 14, 2024

What code / cli command are you executing?

For example: I am running this code:

import os
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
import logging
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import random

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger()

# YouTube API credentials
API_KEY = 'API_KEY'

def get_video_ids_and_titles(playlist_id, max_results=250):
    youtube = build('youtube', 'v3', developerKey=API_KEY)

    videos = []
    next_page_token = None

    while True:
        request = youtube.playlistItems().list(
            part='contentDetails,snippet',
            playlistId=playlist_id,
            maxResults=min(max_results - len(videos), 50),
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            videos.append({
                'id': item['contentDetails']['videoId'],
                'title': item['snippet']['title']
            })

        next_page_token = response.get('nextPageToken')
        if not next_page_token or len(videos) >= max_results:
            break

        # Add a 3-second delay before the next API request
        time.sleep(10)

    return videos

def extract_subtitles(video, output_dir, max_retries=10):
    retry_count = 0
    while retry_count < max_retries:
        try:
            video_id = video['id']
            video_title = video['title']

            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

            arabic_auto_transcript = None
            for transcript in transcript_list:
                if transcript.language_code == 'ar' and transcript.is_generated:
                    arabic_auto_transcript = transcript
                    break

            if arabic_auto_transcript is None:
                logger.warning(f"No Arabic auto-generated subtitles found for video {video_id}")
                return

            transcript = arabic_auto_transcript.fetch()

            safe_title = "".join([c for c in video_title if c.isalpha() or c.isdigit() or c==' ']).rstrip()
            filename = os.path.join(output_dir, f"{safe_title}_ar_auto.txt")

            with open(filename, 'w', encoding='utf-8') as f:
                for entry in transcript:
                    f.write(f"{entry['text']}\n")

            logger.info(f"Arabic auto-generated subtitles extracted for video: {video_title}")
            return
        except Exception as e:
            retry_count += 1
            wait_time = (2 ** retry_count) + (random.randint(0, 1000) / 1000)
            logger.warning(f"Error extracting subtitles for video {video_id}. Retrying in {wait_time:.2f} seconds. Error: {str(e)}")
            time.sleep(wait_time)
    
    logger.error(f"Failed to extract subtitles for video {video_id} after {max_retries} attempts")

def main():
    channel_id = "UCGnCvNgWZ3T7hJJajjGYucA"
    output_dir = "Kwili"
    max_videos = 250  # Set this to the number of videos you want to process

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    logger.info("Starting Arabic auto-generated subtitle extraction process")

    try:
        # Get the uploads playlist ID for the channel
        youtube = build('youtube', 'v3', developerKey=API_KEY)
        request = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        )
        response = request.execute()

        logger.info(f"API Response: {json.dumps(response, indent=2)}")

        if 'items' not in response or not response['items']:
            logger.error(f"No channel found for ID: {channel_id}")
            return

        playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

        logger.info(f"Found uploads playlist ID: {playlist_id}")

        # Add a 3-second delay before the next API request
        time.sleep(10)

        try:
          videos = get_video_ids_and_titles(playlist_id, max_videos)
          logger.info(f"Retrieved {len(videos)} videos")

          with ThreadPoolExecutor(max_workers=1) as executor:  # Reduced max_workers
              future_to_video = {executor.submit(extract_subtitles, video, output_dir): video for video in videos}
              for future in as_completed(future_to_video):
                  video = future_to_video[future]
                  try:
                      future.result()
                  except Exception as exc:
                      logger.error(f"Video {video['id']} generated an exception: {exc}")

                  time.sleep(30)  # Increased delay between video processing

          logger.info("Arabic auto-generated subtitle extraction process completed")

        except Exception as e:
          logger.error(f"An error occurred: {str(e)}")

    except Exception as e:
          logger.error(f"An error occurred: {str(e)}")
if __name__ == "__main__":
    main()```

### Which Python version are you using?
Python 3.10.12

### Which version of youtube-transcript-api are you using?
Version: 0.6.2

# Expected behavior
Describe what you expected to happen. 
to get the required Arabic Auto-generated transcripts from each channel's playlist and export them to txt files. I have set proper timing between each call but if more time is required please let me know if it will fix the issue.

# Actual behaviour

WARNING:root:Error extracting subtitles for video R8mtJd4wUFI. Retrying in 2.84 seconds. Error:
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=429 Client Error: Too Many Requests for url: https://www.google.com/sorry/index?continue=https://www.youtube.com/watch%3Fv%3DR8mtJd4wUFI&q=EgQiqF_7GPe6lbcGIjAPLKF3hlhkSNN1i4W9XS6xKPjE-w0Ks_uSLMc6FDZ-vX9W-tCj2esBEOlM6LWDlo8yAXJaAUM! This is most likely caused by:

Request to YouTube failed: R8mtJd4wUFI

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https:/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
WARNING:root:Error extracting subtitles for video R8mtJd4wUFI. Retrying in 4.70 seconds. Error:
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=429 Client Error: Too Many Requests for url: https://www.google.com/sorry/index?continue=https://www.youtube.com/watch%3Fv%3DR8mtJd4wUFI&q=EgQiqF_7GPq6lbcGIjAbXTmkWPBB4MkQnI1NXYcrP17UrWbZbKIiKPhJaTYZnMXOUovndjDz0TJ9eLzvKGkyAXJaAUM! This is most likely caused by:

Request to YouTube failed: R8mtJd4wUFI

@bltnico
Copy link

bltnico commented Sep 23, 2024

I’ve been having the same problem since this weekend!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants