Testing with regexes for npo.nl episodes

This commit is contained in:
Jan Hoek 2017-03-29 20:44:51 +02:00
parent 4a0b588f3e
commit 481d6638fd

21
testregex.py Normal file
View File

@ -0,0 +1,21 @@
#!/usr/bin/python
import re
npo12files = ['./jinek.htm', './midsomer.htm']
npo3files = ['./keuringsdienst.htm', './pownews.htm']
npo12regex = r"""<div class='span4'>\s*<div class='image-container'>\s*<a href="(.*?)">\s*(<div class="program-not-available">)?"""
npo3regex = r"""<div class='span4 image'>\s*<a href="(.*?)">\s*<div class="meta-container">\s*<div class="meta first">\s*<div class="md-label"><span class="npo-glyph triangle-right"></span></div>\s*<div class="md-value">.*?</div>\s*</div>\s*</div>\s*(<div class="program-not-available">)?"""
for filename in npo12files:
with open(filename) as f:
for match in re.finditer(npo12regex, f.read()):
print(match.group(1), match.group(2) is None)
print('')
for filename in npo3files:
with open(filename) as f:
for match in re.finditer(npo3regex, f.read()):
print(match.group(1), match.group(2) is None)
print('')