From b9e1be57e435fd8d69fa58f546b2dc1206e1f80c Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Fri, 10 Jul 2020 22:44:58 +0100 Subject: [PATCH] Fix generation of forum profile URLs Fixes #196 --- app/tasks/phpbbparser.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/app/tasks/phpbbparser.py b/app/tasks/phpbbparser.py index caaa730..c0d15f5 100644 --- a/app/tasks/phpbbparser.py +++ b/app/tasks/phpbbparser.py @@ -9,6 +9,8 @@ from datetime import datetime import urllib.request import os.path import time, re +import urllib.parse as urlparse +from urllib.parse import urlencode def urlEncodeNonAscii(b): return re.sub('[\x80-\xFF]', lambda c: '%%%02x' % ord(c.group(0)), b) @@ -71,8 +73,24 @@ def __extract_signature(soup): else: return res[0] + +def getProfileURL(url, username): + url = urlparse.urlparse(url) + + # Update path + url = url._replace(path="/memberlist.php") + + # Set query args + query = dict(urlparse.parse_qsl(url.query)) + query.update({ "un": username, "mode": "viewprofile" }) + query_encoded = urlencode(query) + url = url._replace(query=query_encoded) + + return urlparse.urlunparse(url) + + def getProfile(url, username): - url = url + "/memberlist.php?mode=viewprofile&un=" + urlEncodeNonAscii(username) + url = getProfileURL(url, username) req = urllib.request.urlopen(url, timeout=5) if req.getcode() == 404: