--- /dev/null
+#!/usr/bin/python3
+import re
+import html
+import xml.etree.ElementTree as ET
+
+input_file = "recipes.html"
+
+
+def safe_filename(name):
+ name = re.sub(r"[^\w\s-]", '', name).strip().replace(' ', '_')
+ return name[:40]
+
+
+with open(input_file, "r") as f:
+ content = f.read()
+
+# 1. Fix <meta ...> tags: make them self-closing
+content_fixed = re.sub(r'(<meta\b[^>]*)(?<!/)>', r'\1 />', content)
+
+# 2. Fix unquoted itemprop attribute values (itemprop=foo -> itemprop="foo")
+# Only match when the value is not already quoted
+content_fixed = re.sub(r'itemprop=([^\s">]+)', r'itemprop="\1"', content_fixed)
+
+# Parse as XML
+tree = ET.ElementTree(ET.fromstring(content_fixed))
+root = tree.getroot()
+
+head = root.find('head')
+body = root.find('body')
+recipes = [div for div in body.findall('div') if div.get('class') == 'recipe-details']
+
+# Prepare the <meta http-equiv="content-type"...> tag string
+meta_tag = '<meta http-equiv="content-type" content="text/html; charset=utf-8" />'
+
+for recipe in recipes:
+ h2 = recipe.find('.//h2[@itemprop="name"]')
+ meta_id = recipe.find('.//meta[@itemprop="recipeId"]')
+ if h2 is not None and h2.text and h2.text.strip():
+ base = safe_filename(html.unescape(h2.text))
+ elif meta_id is not None:
+ base = meta_id.attrib['content']
+ else:
+ base = "recipe"
+
+ filename = f"{base}.html"
+
+ # Convert head to string and insert the meta tag after <head>
+ head_str = ET.tostring(head, encoding="unicode")
+ head_str = re.sub(
+ r'(<head.*?>)', # Match opening <head> tag (with possible attributes)
+ r'\1\n' + meta_tag, # Insert meta tag right after opening tag
+ head_str,
+ count=1,
+ flags=re.IGNORECASE|re.DOTALL
+ )
+
+ out_html = (
+ '<!DOCTYPE html>\n<html>\n' +
+ head_str + '\n' +
+ '<body>\n' +
+ ET.tostring(recipe, encoding="unicode") + '\n' +
+ '</body>\n</html>'
+ )
+ with open(filename, "w", encoding="utf-8") as out:
+ out.write(out_html)
+ print(f"Wrote: {filename}")