From af0dbf7671f2a183bd5895b024705850efa46bf1 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sat, 3 Jan 2026 17:43:31 +0100 Subject: [PATCH] recipekeeper-split: Fix unescaped URLs --- recipekeeper-split | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/recipekeeper-split b/recipekeeper-split index a56f52e..c510408 100755 --- a/recipekeeper-split +++ b/recipekeeper-split @@ -33,6 +33,11 @@ content_fixed = re.sub(r'(]*)(?', r'\1 />', content) # Only match when the value is not already quoted content_fixed = re.sub(r'itemprop=([^\s">]+)', r'itemprop="\1"', content_fixed) +# 3. Fix unescaped ampersands in attribute values (& -> &) +# This handles ampersands in URLs and other attribute values +# Only escape & that are not already part of an entity +content_fixed = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;|#)', r'&', content_fixed) + # Parse as XML tree = ET.ElementTree(ET.fromstring(content_fixed)) root = tree.getroot() -- 2.47.3