From 223260fdd69a43125c905fd9b50006004a8ea63a Mon Sep 17 00:00:00 2001
From: vccalvin33 <vc_calvin@yahoo.com>
Date: Sat, 21 May 2022 03:01:37 +0700
Subject: [PATCH 1/2] modify comments.py, add video_stats, add video_ids, fix
 some error issues

---
 utils/comments.py |  17 +++-
 yt_public.py      | 247 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 207 insertions(+), 57 deletions(-)

diff --git a/utils/comments.py b/utils/comments.py
index b612b64..782f345 100644
--- a/utils/comments.py
+++ b/utils/comments.py
@@ -1,10 +1,11 @@
 import csv
 from datetime import datetime as dt
 
-comments = []
 today = dt.today().strftime('%d-%m-%Y')
+PATH = 'commentsFolder/'
 
 def process_comments(response_items, csv_output=False):
+    comments = []
 
     for res in response_items:
 
@@ -29,13 +30,19 @@ def process_comments(response_items, csv_output=False):
     return comments
 
 
-def make_csv(comments, channelID=None):
+def make_csv(comments, channelID=None, videoID=None):
+    # Handle 0 comments issue
+    if len(comments) == 0:
+        return
+
     header = comments[0].keys()
 
-    if channelID:
-        filename = f'comments_{channelID}_{today}.csv'
+    if channelID and videoID:
+        filename = f'{PATH}comments_{channelID}_{videoID}_{today}.csv'
+    elif channelID:
+        filename = f'{PATH}comments_{channelID}_{today}.csv'
     else:
-        filename = f'comments_{today}.csv'
+        filename = f'{PATH}comments_{today}.csv'
 
     with open(filename, 'w', encoding='utf8', newline='') as f:
         writer = csv.DictWriter(f, fieldnames=header)
diff --git a/yt_public.py b/yt_public.py
index 3fda378..069ca12 100644
--- a/yt_public.py
+++ b/yt_public.py
@@ -1,15 +1,28 @@
 import os
+import csv
+from datetime import datetime as dt
+from urllib import response
 from dotenv import load_dotenv
 from googleapiclient.discovery import build
 
 from utils.comments import process_comments, make_csv
 
 load_dotenv()
-API_KEY = os.getenv("API_KEY")
+API_KEY_1 = os.getenv("API_KEY_1")
+API_KEY_2 = os.getenv("API_KEY_2")
+API_KEY_3 = os.getenv("API_KEY_3")
+API_KEY_4 = os.getenv("API_KEY_4")
+API_KEY_5 = os.getenv("API_KEY_5")
 
-youtube = build("youtube", "v3", developerKey=API_KEY)
+youtube_1 = build("youtube", "v3", developerKey=API_KEY_1)
+youtube_2 = build("youtube", "v3", developerKey=API_KEY_2)
+youtube_3 = build("youtube", "v3", developerKey=API_KEY_3)
+youtube_4 = build("youtube", "v3", developerKey=API_KEY_4)
+youtube_5 = build("youtube", "v3", developerKey=API_KEY_5)
 
-def search_result(query):
+scraped_videos = {}
+
+def search_result(youtube, query):
     """
     Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
     """
@@ -21,87 +34,217 @@ def search_result(query):
 
     return request.execute()
 
-def channel_stats(channelID):
+def get_video_ids(youtube, channelId):
     """
-    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.channels.html
+    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
     """
+    videoIds = []
+
     request = youtube.channels().list(
-        part="statistics",
-        id=channelID
+        part="contentDetails",
+        id=channelId
     )
-    return request.execute()
 
-def comment_threads(channelID, to_csv=False):
-    
-    comments_list = []
-    
-    request = youtube.commentThreads().list(
-        part='id,replies,snippet',
-        videoId=channelID,
+    response = request.execute()
+
+    playlistId = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
+
+    request = youtube.playlistItems().list(
+        part="contentDetails",
+        playlistId=playlistId,
+        maxResults=50
     )
+
     response = request.execute()
-    comments_list.extend(process_comments(response['items']))
+    responseItems = response['items']
+
+    videoIds.extend([item['contentDetails']['videoId'] for item in responseItems])
 
     # if there is nextPageToken, then keep calling the API
     while response.get('nextPageToken', None):
-        request = youtube.commentThreads().list(
-            part='id,replies,snippet',
-            videoId=channelID,
+        print(f'Fetching next page of videos for {channelId}_{playlistId}')
+        request = youtube.playlistItems().list(
+            part="contentDetails",
+            playlistId=playlistId,
+            maxResults=50,
             pageToken=response['nextPageToken']
         )
         response = request.execute()
-        comments_list.extend(process_comments(response['items']))
+        responseItems = response['items']
+
+        videoIds.extend([item['contentDetails']['videoId'] for item in responseItems])
     
-    print(f"Finished fetching comments for {channelID}. {len(comments_list)} comments found.")
+    print(f"Finished fetching videoIds for {channelId}. {len(videoIds)} videos found.")
+
+    return videoIds
+
+def channel_stats(youtube, channelIDs, to_csv=False):
+    """
+    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.channels.html
+    """
+    if type(channelIDs) == str:
+        channelIDs = [channelIDs]
+
+    stats_list = []
+
+    for channelId in channelIDs:
+        request = youtube.channels().list(
+            part="statistics",
+            id=channelId
+        )
+        response = request.execute()
+        response = response['items'][0]['statistics']
+        response['channelId'] = channelId
+
+        stats_list.append(response)
+
+    if to_csv:
+        header = stats_list[0].keys()
+        with open(f'channelStats.csv', 'w') as f:
+            writer = csv.DictWriter(f, fieldnames=header)
+            writer.writeheader()
+            writer.writerows(stats_list)
+
+    return stats_list
+
+def video_stats(youtube, videoIDs, channelID, to_csv=False):
+    if type(videoIDs) == str:
+        videoIDs = [videoIDs]
+    
+    stats_list = []
+
+    for videoId in videoIDs:
+        request = youtube.videos().list(
+            part="snippet, statistics, contentDetails",
+            id=videoId
+        )
+        response = request.execute()
+        statistics = response['items'][0]['statistics']
+        snippet = response['items'][0]['snippet']
+        statistics['videoId'] = videoId
+        statistics['title'] = snippet['title']
+        statistics['description'] = snippet['description']
+        statistics['publishedAt'] = snippet['publishedAt']
+        statistics['duration'] = response['items'][0]['contentDetails']['duration']
+        statistics['thumbnail'] = snippet['thumbnails']['high']['url']
+        statistics['channelId'] = channelID
+
+        if statistics.get('likeCount', None) == None:
+            statistics['likeCount'] = 0
+
+        print(f"Fetched stats for {videoId}")
+        stats_list.append(statistics)
     
     if to_csv:
-        make_csv(comments_list, channelID)
+        header = stats_list[0].keys()
+        with open(f'videosFolder/videoStats_{channelID}.csv', 'w', encoding='utf8', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=header)
+            writer.writeheader()
+            writer.writerows(stats_list)
     
-    return comments_list
+    print(f'Success in fetching video stats for {channelID}')
 
+    return stats_list
 
-def get_video_ids(channelId):
-    """
-    Refer to the documentation: https://googleapis.github.io/google-api-python-client/docs/dyn/youtube_v3.search.html
-    """
-    videoIds = []
- 
-    request = youtube.search().list(
-        part="snippet",
-        channelId=channelId,
-        type="video",
-        maxResults=50,
-        order="date"
-    )
 
-    response = request.execute()
-    responseItems = response['items']
+def comment_threads(youtube, videoID, channelID=None, to_csv=False):
+    
+    comments_list = []
+    
+    try:
+        request = youtube.commentThreads().list(
+            part='id,replies,snippet',
+            videoId=videoID,
+        )
+        response = request.execute()
+    except Exception as e:
+        print(f'Error fetching comments for {videoID} - error: {e}')
+        if scraped_videos.get('error_ids', None):
+            scraped_videos['error_ids'].append(videoID)
+        else:
+            scraped_videos['error_ids'] = [videoID]
+        return
 
-    videoIds.extend([item['id']['videoId'] for item in responseItems if item['id'].get('videoId', None) != None])
+    comments_list.extend(process_comments(response['items']))
 
     # if there is nextPageToken, then keep calling the API
     while response.get('nextPageToken', None):
-        request = youtube.search().list(
-            part="snippet",
-            channelId=channelId,
+        request = youtube.commentThreads().list(
+            part='id,replies,snippet',
+            videoId=videoID,
+            pageToken=response['nextPageToken']
         )
         response = request.execute()
-        responseItems = response['items']
+        comments_list.extend(process_comments(response['items']))
+    
+    print(f"Finished fetching comments for {videoID}. {len(comments_list)} comments found.")
+    
+    if to_csv:
+        try:
+            make_csv(comments_list, channelID, videoID)
+        except Exception as e:
+            print(f'Error writing comments to csv for {videoID} - error: {e}')
+            if scraped_videos.get('error_csv_ids', None):
+                scraped_videos['error_csv_ids'].append(videoID)
+            else:
+                scraped_videos['error_csv_ids'] = [videoID]
+            return
+
+    if scraped_videos.get(channelID, None):
+        scraped_videos[channelID].append(videoID)
+    else:
+        scraped_videos[channelID] = [videoID]
+    
+    return comments_list
 
-        videoIds.extend([item['id']['videoId'] for item in responseItems if item['id'].get('videoId', None) != None])
+if __name__ == '__main__':
+    pyscriptVidId = 'Qo8dXyKXyME'
+    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'
+
+    channelIds = []
+    # with open('youtube.csv', 'r') as csvfile:
+    #     reader = csv.reader(csvfile)
+    #     next(reader)
+    #     for row in reader:
+    #         channelIds.append(row[2])
+
+    # channel_stats(youtube_2, channelIds, to_csv=True)
+
+    # videoDict = {}
+    # for idx in range(len(channelIds)):
+    #     if idx <= len(channelIds)/5:
+    #         youtube = youtube_1
+    #     elif idx <= len(channelIds)/5*2:
+    #         youtube = youtube_2
+    #     elif idx <= len(channelIds)/5*3:
+    #         youtube = youtube_3
+    #     elif idx <= len(channelIds)/5*4:
+    #         youtube = youtube_4
+    #     else:
+    #         youtube = youtube_5
+    #     videoIds = get_video_ids(youtube, channelIds[idx])
+    #     videoDict[channelIds[idx]] = videoIds
     
-    print(f"Finished fetching videoIds for {channelId}. {len(videoIds)} videos found.")
+    import json
+    # with open('videoDict.json', 'w') as fp:
+    #     json.dump(videoDict, fp)
 
-    return videoIds
+    with open('videoDict.json', 'r') as fp:
+        videoDict = json.load(fp)
 
+    for channelId, videoIds in videoDict.items():
+        video_stats(youtube_3, videoIDs=videoIds, channelID=channelId, to_csv=True)
 
+    # for channelId, videoIds in videoDict.items():
+    #     for videoId in videoIds:
+    #         comment_threads(youtube_3, videoID=videoId, channelID=channelId, to_csv=True)
 
-if __name__ == '__main__':
-    pyscriptVidId = 'Qo8dXyKXyME'
-    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'
+    # with open('scrapedVideos.json', 'w') as fp:
+    #     json.dump(scraped_videos, fp)
 
     # response = search_result("pyscript")
-    response = channel_stats(channelId) 
-    # response = comment_threads(pyscriptVidId, to_csv=True)
+    # response = channel_stats(youtube_2, channelId) 
+    # response = comment_threads(youtube_2, videoID='pnecPXlfR5U', to_csv=False)
+    # print(response)
 
-    print(response)
+    #NOTES -> troublesome video (videoId: CqssnS_v1a4) -> no likecount, so I manually add 0 to it
\ No newline at end of file

From fc9d9d93be02849b884e2bf238fca2abadd87caf Mon Sep 17 00:00:00 2001
From: vccalvin33 <vc_calvin@yahoo.com>
Date: Sat, 21 May 2022 15:56:40 +0700
Subject: [PATCH 2/2] make videoStats func cleaner

---
 yt_public.py | 54 ++--------------------------------------------------
 1 file changed, 2 insertions(+), 52 deletions(-)

diff --git a/yt_public.py b/yt_public.py
index 069ca12..4e88a51 100644
--- a/yt_public.py
+++ b/yt_public.py
@@ -128,9 +128,7 @@ def video_stats(youtube, videoIDs, channelID, to_csv=False):
         statistics['duration'] = response['items'][0]['contentDetails']['duration']
         statistics['thumbnail'] = snippet['thumbnails']['high']['url']
         statistics['channelId'] = channelID
-
-        if statistics.get('likeCount', None) == None:
-            statistics['likeCount'] = 0
+        statistics['likeCount'] = statistics.get('likeCount', 0)
 
         print(f"Fetched stats for {videoId}")
         stats_list.append(statistics)
@@ -199,52 +197,4 @@ def comment_threads(youtube, videoID, channelID=None, to_csv=False):
 
 if __name__ == '__main__':
     pyscriptVidId = 'Qo8dXyKXyME'
-    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'
-
-    channelIds = []
-    # with open('youtube.csv', 'r') as csvfile:
-    #     reader = csv.reader(csvfile)
-    #     next(reader)
-    #     for row in reader:
-    #         channelIds.append(row[2])
-
-    # channel_stats(youtube_2, channelIds, to_csv=True)
-
-    # videoDict = {}
-    # for idx in range(len(channelIds)):
-    #     if idx <= len(channelIds)/5:
-    #         youtube = youtube_1
-    #     elif idx <= len(channelIds)/5*2:
-    #         youtube = youtube_2
-    #     elif idx <= len(channelIds)/5*3:
-    #         youtube = youtube_3
-    #     elif idx <= len(channelIds)/5*4:
-    #         youtube = youtube_4
-    #     else:
-    #         youtube = youtube_5
-    #     videoIds = get_video_ids(youtube, channelIds[idx])
-    #     videoDict[channelIds[idx]] = videoIds
-    
-    import json
-    # with open('videoDict.json', 'w') as fp:
-    #     json.dump(videoDict, fp)
-
-    with open('videoDict.json', 'r') as fp:
-        videoDict = json.load(fp)
-
-    for channelId, videoIds in videoDict.items():
-        video_stats(youtube_3, videoIDs=videoIds, channelID=channelId, to_csv=True)
-
-    # for channelId, videoIds in videoDict.items():
-    #     for videoId in videoIds:
-    #         comment_threads(youtube_3, videoID=videoId, channelID=channelId, to_csv=True)
-
-    # with open('scrapedVideos.json', 'w') as fp:
-    #     json.dump(scraped_videos, fp)
-
-    # response = search_result("pyscript")
-    # response = channel_stats(youtube_2, channelId) 
-    # response = comment_threads(youtube_2, videoID='pnecPXlfR5U', to_csv=False)
-    # print(response)
-
-    #NOTES -> troublesome video (videoId: CqssnS_v1a4) -> no likecount, so I manually add 0 to it
\ No newline at end of file
+    channelId = 'UCzIxc8Vg53_ewaRIk3shBug'
\ No newline at end of file