From 3e93811d93b2bc88f047e9a989b456ab3ae3291c Mon Sep 17 00:00:00 2001 From: Michael Weiss Date: Fri, 16 Jul 2021 12:14:45 +0200 Subject: [PATCH] chromium: get-commit-message.py: Improve the parsing The current stable release announcement [0] uses more HTML tags which broke the detection of "fixes" and "zero_days". Proper HTML parsing could be done using html.parser [1] but for our purposes the naive regex trick works well enough. [0]: https://chromereleases.googleblog.com/2021/07/stable-channel-update-for-desktop.html [1]: https://docs.python.org/3/library/html.parser.html --- .../networking/browsers/chromium/get-commit-message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkgs/applications/networking/browsers/chromium/get-commit-message.py b/pkgs/applications/networking/browsers/chromium/get-commit-message.py index 2768e31bd032..7a91b74c83d5 100755 --- a/pkgs/applications/networking/browsers/chromium/get-commit-message.py +++ b/pkgs/applications/networking/browsers/chromium/get-commit-message.py @@ -19,14 +19,14 @@ for entry in feed.entries: continue url = requests.get(entry.link).url.split('?')[0] content = entry.content[0].value + content = html_tags.sub('', content) # Remove any HTML tags if re.search(r'Linux', content) is None: continue #print(url) # For debugging purposes version = re.search(r'\d+(\.\d+){3}', content).group(0) print('chromium: TODO -> ' + version) print('\n' + url) - if fixes := re.search(r'This update includes .+ security fixes\.', content): - fixes = html_tags.sub('', fixes.group(0)) + if fixes := re.search(r'This update includes .+ security fixes\.', content).group(0): zero_days = re.search(r'Google is aware( of reports)? that .+ in the wild\.', content) if zero_days: fixes += " " + zero_days.group(0)