From 7253770f0de6f48795f717369e343485d40e6c6e Mon Sep 17 00:00:00 2001
From: user706 <39215612+user706@users.noreply.github.com>
Date: Sun, 6 Jan 2019 22:24:22 +0100
Subject: [PATCH] [gramofononline] improve

---
 youtube_dl/extractor/extractors.py     |   5 +-
 youtube_dl/extractor/gramofononline.py | 117 ++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 24 deletions(-)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 637c56d67..d40bbde3e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -439,7 +439,10 @@ from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
-from .gramofononline import GramofonOnlineIE
+from .gramofononline import (
+    GramofonOnlineIE,
+    GramofonOnlinePlaylistIE,
+)
 from .groupon import GrouponIE
 from .hark import HarkIE
 from .hbo import (
diff --git a/youtube_dl/extractor/gramofononline.py b/youtube_dl/extractor/gramofononline.py
index 409710baa..9814afc3d 100644
--- a/youtube_dl/extractor/gramofononline.py
+++ b/youtube_dl/extractor/gramofononline.py
@@ -2,12 +2,11 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-import re
-import json
+from ..utils import try_get
 
 
 class GramofonOnlineIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?gramofononline\.hu(/(listen.php\?.*track=)?(?P<id>[0-9]+))?'
+    _VALID_URL = r'https?://(?:www\.)?gramofononline\.hu/(?:hu/|en/|de/)?(?:listen.php\?.*track=)?(?P<id>[0-9]+)'
 
     _TESTS = [{
         'url': 'https://gramofononline.hu/1401835664/papageno-duett',
@@ -18,6 +17,19 @@ class GramofonOnlineIE(InfoExtractor):
             'artist': 'Johanna Gadski, Otto Goritz, ismeretlen zenekar',
             'ext': 'mp3'
         }
+    }, {
+        # same as above but with         /en/
+        'url': 'https://gramofononline.hu/en/1401835664/papageno-duett',
+        'md5': '1b4bcabde313f09cdd48c463b54d8125',
+        'info_dict': {
+            'id': '1401835664',
+            'title': 'Papageno-Duett ',
+            'artist': 'Johanna Gadski, Otto Goritz, ismeretlen zenekar',
+            'ext': 'mp3'
+        },
+        'params': {
+            'skip_download': True,
+        }
     }, {
         'url': 'https://gramofononline.hu/listen.php?autoplay=true&track=1401835664',
         'md5': '1b4bcabde313f09cdd48c463b54d8125',
@@ -30,36 +42,95 @@ class GramofonOnlineIE(InfoExtractor):
         'params': {
             'skip_download': True,
         }
+    }, {
+        # same as above but with         /en/
+        'url': 'https://gramofononline.hu/en/listen.php?autoplay=true&track=1401835664',
+        'md5': '1b4bcabde313f09cdd48c463b54d8125',
+        'info_dict': {
+            'id': '1401835664',
+            'title': 'Papageno-Duett ',
+            'artist': 'Johanna Gadski, Otto Goritz, ismeretlen zenekar',
+            'ext': 'mp3'
+        },
+        'params': {
+            'skip_download': True,
+        }
     }]
 
-    def _get_entry(self, obj):
-        id1 = obj.get('id')
-        source = obj.get('source')
-        title = obj.get('name')
+    def _get_entry(self, obj, webpage):
+        id1 = (obj.get('id')
+               or self._search_regex(r'var\s*track=([^;]+);', webpage, 'id', default=None)
+               or self._search_regex(r'http://gramofononline\.hu/flash/loader\.swf\?id=(\w+)', webpage, 'id'))
+        url_suffix = (obj.get('source')
+                      or self._search_regex(r'/data\.php\?n=600&amp;fname=(\w+)', webpage, 'url_suffix', default=None)
+                      or self._search_regex(r'http://gramofononline\.hu/keyframe/go/midres/midres_(\w+)', webpage, 'url_suffix'))
+        title = (obj.get('name')
+                 or self._html_search_regex(r'<title>Gramofon Online / (.*)</title>', webpage, 'title')
+                 or self._og_search_title(webpage))
         artist = obj.get('artist')
-        return {
-            'id': id1,
-            'title': title,
-            'http_headers': {'Referer': 'https://gramofononline.hu/' + id1},
-            'artist': artist,
-            'thumbnail': 'https://gramofononline.hu/getImage.php?id=' + source,
-            'formats': [{
-                'url': 'https://gramofononline.hu/go/master/' + source + '.mp3',
-                'ext': 'mp3'
-            }, {
-                'url': 'https://gramofononline.hu/go/noise_reduction/' + source + '.mp3',
-                'ext': 'mp3'
-            }]
-        }
+        return get_gramofon_online_info_dict(id1, title, url_suffix, artist)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        m = re.search(r'var\s+trackList\s*=\s*(\[.*\]);', webpage)
-        lineobjs = json.loads(m.group(1))
+        json_tracklist = self._search_regex(r'var\s+trackList\s*=\s*(\[.*\]);', webpage, 'json_tracklist')
+        lineobjs = self._parse_json(json_tracklist, video_id, transform_source=None, fatal=False) or {}
+        obj = try_get(lineobjs, lambda x: x[0]) or {}
+        return self._get_entry(obj, webpage)
+
+
+class GramofonOnlinePlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gramofononline\.hu(?:/hu|/en|/de)?(?:/index.php?.*playradio)?'
+
+    _TESTS = [{
+        'url': 'https://gramofononline.hu',
+        'only_matching': True
+    }, {
+        'url': 'https://gramofononline.hu/',
+        'only_matching': True
+    }, {
+        'url': 'https://gramofononline.hu/en/',
+        'only_matching': True
+    }, {
+        'url': 'https://gramofononline.hu/index.php?playradio=ord%3D7%26w%3D2&autoplay=1',
+        'only_matching': True
+    }, {
+        'url': 'https://gramofononline.hu/en/index.php?playradio=ord%3D7%26w%3D2&autoplay=1',
+        'only_matching': True
+    }]
+
+    def _get_entry(self, obj):
+        id1 = obj['id']
+        url_suffix = obj['source']
+        title = obj['name']
+        artist = obj.get('artist')
+        return get_gramofon_online_info_dict(id1, title, url_suffix, artist)
+
+    def _real_extract(self, url):
+        webpage = self._download_webpage(url, url)
+
+        json_tracklist = self._search_regex(r'var\s+trackList\s*=\s*(\[.*\]);', webpage, 'json_tracklist')
+        lineobjs = self._parse_json(json_tracklist, url)
 
         return {
             '_type': 'playlist',
             'entries': [self._get_entry(obj) for obj in lineobjs]
         }
+
+
+def get_gramofon_online_info_dict(id1, title, url_suffix, artist):
+    return {
+        'id': id1,
+        'title': title,
+        'http_headers': {'Referer': 'https://gramofononline.hu/' + id1},
+        'artist': artist,
+        'thumbnail': 'https://gramofononline.hu/getImage.php?id=' + url_suffix,
+        'formats': [{
+            'url': 'https://gramofononline.hu/go/master/' + url_suffix + '.mp3',
+            'ext': 'mp3'
+        }, {
+            'url': 'https://gramofononline.hu/go/noise_reduction/' + url_suffix + '.mp3',
+            'ext': 'mp3'
+        }]
+    }