# Only match when the value is not already quoted
content_fixed = re.sub(r'itemprop=([^\s">]+)', r'itemprop="\1"', content_fixed)
+# 3. Fix unescaped ampersands in attribute values (& -> &)
+# This handles ampersands in URLs and other attribute values
+# Only escape & that are not already part of an entity
+content_fixed = re.sub(r'&(?!amp;|lt;|gt;|quot;|apos;|#)', r'&', content_fixed)
+
# Parse as XML
tree = ET.ElementTree(ET.fromstring(content_fixed))
root = tree.getroot()