From 312786e1d19b153a7797a1e0d675709c95208622 Mon Sep 17 00:00:00 2001 From: Martin Pitt Date: Sun, 18 May 2025 21:22:16 +0200 Subject: [PATCH] recipekeeper-split: Add --- recipekeeper-split | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 recipekeeper-split diff --git a/recipekeeper-split b/recipekeeper-split new file mode 100755 index 0000000..b7c9c61 --- /dev/null +++ b/recipekeeper-split @@ -0,0 +1,66 @@ +#!/usr/bin/python3 +import re +import html +import xml.etree.ElementTree as ET + +input_file = "recipes.html" + + +def safe_filename(name): + name = re.sub(r"[^\w\s-]", '', name).strip().replace(' ', '_') + return name[:40] + + +with open(input_file, "r") as f: + content = f.read() + +# 1. Fix tags: make them self-closing +content_fixed = re.sub(r'(]*)(?', r'\1 />', content) + +# 2. Fix unquoted itemprop attribute values (itemprop=foo -> itemprop="foo") +# Only match when the value is not already quoted +content_fixed = re.sub(r'itemprop=([^\s">]+)', r'itemprop="\1"', content_fixed) + +# Parse as XML +tree = ET.ElementTree(ET.fromstring(content_fixed)) +root = tree.getroot() + +head = root.find('head') +body = root.find('body') +recipes = [div for div in body.findall('div') if div.get('class') == 'recipe-details'] + +# Prepare the tag string +meta_tag = '' + +for recipe in recipes: + h2 = recipe.find('.//h2[@itemprop="name"]') + meta_id = recipe.find('.//meta[@itemprop="recipeId"]') + if h2 is not None and h2.text and h2.text.strip(): + base = safe_filename(html.unescape(h2.text)) + elif meta_id is not None: + base = meta_id.attrib['content'] + else: + base = "recipe" + + filename = f"{base}.html" + + # Convert head to string and insert the meta tag after + head_str = ET.tostring(head, encoding="unicode") + head_str = re.sub( + r'()', # Match opening tag (with possible attributes) + r'\1\n' + meta_tag, # Insert meta tag right after opening tag + head_str, + count=1, + flags=re.IGNORECASE|re.DOTALL + ) + + out_html = ( + '\n\n' + + head_str + '\n' + + '\n' + + ET.tostring(recipe, encoding="unicode") + '\n' + + '\n' + ) + with open(filename, "w", encoding="utf-8") as out: + out.write(out_html) + print(f"Wrote: {filename}") -- 2.47.2