Expand PlaylistDataCache to automatically fetch missing metadata

This is only used if the constructor kwarg "auto_fetch_data" evaluates to True
2018-03-05 23:13:36 +01:00 · 2018-03-05 23:13:36 +01:00 · a9129b197a
parent 33be49d2d3
commit a9129b197a
4 changed files with 74 additions and 6 deletions
--- a/grzegorz/init.py
+++ b/grzegorz/init.py
@ -45,6 +45,7 @@ def make_sanic_app(host="0.0.0.0", port=8080):
        print("mpv is no longer running. Stopping Sanic...")
        app.stop()
    asyncio.ensure_future(runMPVControl())
+    asyncio.ensure_future(api.PLAYLIST_DATA_CACHE.run())
    
    return loop, app

--- a/grzegorz/api.py
+++ b/grzegorz/api.py
@ -39,7 +39,7 @@ def response_text(func):

 class APIError(Exception): pass

-PLAYLIST_DATA_CACHE = PlaylistDataCache()
+PLAYLIST_DATA_CACHE = PlaylistDataCache(auto_fetch_data=True)

 #routes:
@bp.get("")
--- a/grzegorz/metadatafetch.py
+++ b/grzegorz/metadatafetch.py
@ -1,7 +1,53 @@
+from urllib.parse import urlsplit, urlunsplit, parse_qs, urlencode
 import youtube_dl
+from youtube_dl.utils import DownloadError
 from . import nyasync

@nyasync.ify
 def title(url):
-    ydl = youtube_dl.YoutubeDL()
-    return ydl.extract_info(url, download=False).get('title')
+	ydl = youtube_dl.YoutubeDL()
+	return ydl.extract_info(url, download=False).get('title')
+
+def filter_query_params(url, allowed=[]):
+	split_url = urlsplit(url)
+	
+	qs = parse_qs(split_url.query)
+	print(qs)
+	for key in list(qs.keys()):
+		if key not in allowed:
+			del qs[key]
+	
+	return urlunsplit((
+		split_url.scheme,
+		split_url.netloc,
+		split_url.path,
+		urlencode(qs, doseq=True),
+		split_url.fragment,
+		))
+
+@nyasync.ify
+def get_youtube_dl_metadata(url, ydl = youtube_dl.YoutubeDL()):
+	if urlsplit(url).scheme == "":
+		return None
+	if urlsplit(url).netloc.lower() in ("www.youtube.com", "youtube.com", "youtub.be"):
+		#Stop it from doing the whole playlist
+		url = filter_query_params(url, allowed=["v"])
+	if urlsplit(url).scheme == "ytdl":
+		url = f"https://youtube.com/watch?v={urlsplit(url).netloc}"
+	
+	try:
+		resp = ydl.extract_info(url, download=False)
+	except DownloadError:
+		return None
+	
+	#filter and return:
+	return {k:v for k, v in resp.items() if k in
+		("uploader", "title", "thumbnail", "description", "duration")}
+
+async def get_metadata(url):
+	data = await get_youtube_dl_metadata(url)
+	if data is None:
+		# (TODO): local ID3 tags
+		return {"failed":True}
+	
+	return data
--- a/grzegorz/playlist_data.py
+++ b/grzegorz/playlist_data.py
@ -1,12 +1,27 @@
+import asyncio
+from .metadatafetch import get_metadata
+from . import nyasync

 #Used in api.playlist_get() and api.loadfile()
 class PlaylistDataCache:
-    def __init__(self):
+    def __init__(self, auto_fetch_data = False):
        self.filepath_data_map = {}
-    
+        self.auto_fetch_data = auto_fetch_data
+        self.jobs = None
    def add_data(self, filepath, data=None):
        if data:
            self.filepath_data_map[filepath] = data
+    async def run(self):
+        if not self.auto_fetch_data: return
+        
+        self.jobs = nyasync.Queue()
+        async for filename in self.jobs:
+            print("Fetching metadata for ", repr(filename))
+            data = await get_metadata(filename)
+            #might already be gone by this point:
+            if filename in self.filepath_data_map:
+                self.filepath_data_map[filename].update(data)
+                del self.filepath_data_map[filename]["fetching"]
    def add_data_to_playlist(self, playlist):
        seen = set()
        
@ -18,10 +33,16 @@ class PlaylistDataCache:
                    new_item["data"] = self.filepath_data_map[item["filename"]]
                    yield new_item
                    continue
+                elif self.auto_fetch_data:
+                    self.filepath_data_map[item["filename"]] = {"fetching":True}
+                    self.jobs.put_nowait(item["filename"])
+                    new_item = item.copy()
+                    new_item["data"] = {"fetching":True}
+                    yield new_item
+                    continue
            yield item
        
        not_seen = set(self.filepath_data_map.keys()) - seen
        for name in not_seen:
            del self.filepath_data_map[name]
-