diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..f843d4e --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,31 @@ +# SciSiteForge Scripts + +## 🛠️ Build & Translate + +This site framework supports offline multilingual translation using Llamafile. + +### Prerequisites +- Download a multilingual GGUF model (e.g., `mistral-7b-instruct.Q5_K_M.gguf`) +- Install [Llamafile](https://github.com/Mozilla-Ocho/llamafile) +- Python 3 with `requests` and `beautifulsoup4` + +### Steps +1. Launch Llamafile: + ```bash + ./mistral-7b-instruct.Q5_K_M.llamafile --port 8080 + ``` +2. Run translation: + ```bash + python scripts/translate_site.py --langs es,fr + ``` +3. Commit translated content: + ```bash + git add es/ fr/ + ``` + +> Translated files are saved to `/es/`, `/fr/`, etc., and served alongside English content. +``` + +#### 📁 `example/content/scripts/glossary_es.json` +→ Language-specific scientific term mappings + diff --git a/scripts/glossary_es.json b/scripts/glossary_es.json new file mode 100644 index 0000000..30e68f5 --- /dev/null +++ b/scripts/glossary_es.json @@ -0,0 +1,6 @@ +{ + "genetic drift": "deriva genética", + "natural selection": "selección natural", + "punctuated equilibrium": "equilibrio puntuado", + "allele": "alelo" +} diff --git a/scripts/translate_site.py b/scripts/translate_site.py new file mode 100644 index 0000000..3844e9e --- /dev/null +++ b/scripts/translate_site.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +""" +Offline multilingual translation for evo-edu.org using Llamafile. +Requires: BeautifulSoup4, requests +Install with: pip install beautifulsoup4 requests +""" + +import os +import json +import argparse +import time +from pathlib import Path +from bs4 import BeautifulSoup, NavigableString +import requests + +# --- Configuration --- +MODEL_API_URL = "http://localhost:8080/completion" +LANGUAGES = { + "es": "Spanish", + "fr": "French", + "pt": "Portuguese", + "de": "German" +} + +def translate_text(text, target_lang_name, glossary=None): + """Translate a block of text using Llamafile.""" + if not text.strip(): + return text + + glossary_text = "" + if glossary: + glossary_text = "Use these translations:\n" + "\n".join(f"'{k}' → '{v}'" for k, v in glossary.items()) + "\n\n" + + prompt = f"""You are a scientific translator. Translate the following English text into {target_lang_name}. +Preserve technical terms like "genetic drift" or "natural selection" unless a standard translation exists. +Maintain paragraph structure. Do not add commentary. + +{glossary_text}Text: +"{text}" + +Translation:""" + + try: + response = requests.post(MODEL_API_URL, json={ + "prompt": prompt, + "temperature": 0.1, + "stop": ["\n\n", "Text:", "Translation:"], + "n_predict": 1024 + }, timeout=120) + response.raise_for_status() + result = response.json()["content"].strip() + return result + except Exception as e: + print(f" âš ï¸ Translation failed: {e}") + return text # fallback to original + +def extract_translatable_text(soup): + """Extract text nodes for translation, preserving structure.""" + texts = [] + for elem in soup.descendants: + if isinstance(elem, NavigableString) and elem.parent.name not in ['script', 'style']: + if elem.strip(): + texts.append(elem) + return texts + +def translate_html_file(src_path, dest_path, target_lang_code): + """Translate an HTML file.""" + print(f"Translating {src_path} → {dest_path}") + with open(src_path, 'r', encoding='utf-8') as f: + html = f.read() + + soup = BeautifulSoup(html, 'html.parser') + text_nodes = extract_translatable_text(soup) + + # Optional: load glossary for this language + glossary = {} + glossary_path = Path(__file__).parent / f"glossary_{target_lang_code}.json" + if glossary_path.exists(): + with open(glossary_path, 'r') as f: + glossary = json.load(f) + + # Translate each text node + for node in text_nodes: + original = str(node) + translated = translate_text(original, LANGUAGES[target_lang_code], glossary) + node.replace_with(translated) + time.sleep(0.1) # be gentle on CPU + + # Save translated HTML + dest_path.parent.mkdir(parents=True, exist_ok=True) + with open(dest_path, 'w', encoding='utf-8') as f: + f.write(str(soup)) + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--langs", required=True, help="Comma-separated language codes (e.g., es,fr)") + parser.add_argument("--src", default="content/en", help="Source directory (English)") + parser.add_argument("--dest", default="content", help="Base destination directory") + args = parser.parse_args() + + lang_codes = args.langs.split(',') + src_base = Path(args.src) + dest_base = Path(args.dest) + + for lang_code in lang_codes: + if lang_code not in LANGUAGES: + print(f"Unsupported language: {lang_code}") + continue + + print(f"\n=== Translating to {LANGUAGES[lang_code]} ({lang_code}) ===") + for html_file in src_base.rglob("*.html"): + rel_path = html_file.relative_to(src_base) + dest_file = dest_base / lang_code / rel_path + translate_html_file(html_file, dest_file, lang_code) + + print("\n✅ Translation complete.") + +if __name__ == "__main__": + main()