Initial scripts/ files

2025-10-15 00:42:12 -04:00 · 2025-10-15 00:42:12 -04:00 · 6648269557
parent b59a3884a3
commit 6648269557
3 changed files with 156 additions and 0 deletions
--- a/scripts/README.md
+++ b/scripts/README.md
@ -0,0 +1,31 @@
+# SciSiteForge Scripts
+
+## 🛠️ Build & Translate
+
+This site framework supports offline multilingual translation using Llamafile.
+
+### Prerequisites
+- Download a multilingual GGUF model (e.g., `mistral-7b-instruct.Q5_K_M.gguf`)
+- Install [Llamafile](https://github.com/Mozilla-Ocho/llamafile)
+- Python 3 with `requests` and `beautifulsoup4`
+
+### Steps
+1. Launch Llamafile:
+   ```bash
+   ./mistral-7b-instruct.Q5_K_M.llamafile --port 8080
+   ```
+2. Run translation:
+   ```bash
+   python scripts/translate_site.py --langs es,fr
+   ```
+3. Commit translated content:
+   ```bash
+   git add es/ fr/
+   ```
+
+> Translated files are saved to `/es/`, `/fr/`, etc., and served alongside English content.
+```
+
+#### 📁 `example/content/scripts/glossary_es.json`  
+→ Language-specific scientific term mappings
+
--- a/scripts/glossary_es.json
+++ b/scripts/glossary_es.json
@ -0,0 +1,6 @@
+{
+  "genetic drift": "deriva genética",
+  "natural selection": "selección natural",
+  "punctuated equilibrium": "equilibrio puntuado",
+  "allele": "alelo"
+}
--- a/scripts/translate_site.py
+++ b/scripts/translate_site.py
@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+Offline multilingual translation for evo-edu.org using Llamafile.
+Requires: BeautifulSoup4, requests
+Install with: pip install beautifulsoup4 requests
+"""
+
+import os
+import json
+import argparse
+import time
+from pathlib import Path
+from bs4 import BeautifulSoup, NavigableString
+import requests
+
+# --- Configuration ---
+MODEL_API_URL = "http://localhost:8080/completion"
+LANGUAGES = {
+    "es": "Spanish",
+    "fr": "French",
+    "pt": "Portuguese",
+    "de": "German"
+}
+
+def translate_text(text, target_lang_name, glossary=None):
+    """Translate a block of text using Llamafile."""
+    if not text.strip():
+        return text
+
+    glossary_text = ""
+    if glossary:
+        glossary_text = "Use these translations:\n" + "\n".join(f"'{k}' â†’ '{v}'" for k, v in glossary.items()) + "\n\n"
+
+    prompt = f"""You are a scientific translator. Translate the following English text into {target_lang_name}.
+Preserve technical terms like "genetic drift" or "natural selection" unless a standard translation exists.
+Maintain paragraph structure. Do not add commentary.
+
+{glossary_text}Text:
+"{text}"
+
+Translation:"""
+
+    try:
+        response = requests.post(MODEL_API_URL, json={
+            "prompt": prompt,
+            "temperature": 0.1,
+            "stop": ["\n\n", "Text:", "Translation:"],
+            "n_predict": 1024
+        }, timeout=120)
+        response.raise_for_status()
+        result = response.json()["content"].strip()
+        return result
+    except Exception as e:
+        print(f"  âš ï¸ Translation failed: {e}")
+        return text  # fallback to original
+
+def extract_translatable_text(soup):
+    """Extract text nodes for translation, preserving structure."""
+    texts = []
+    for elem in soup.descendants:
+        if isinstance(elem, NavigableString) and elem.parent.name not in ['script', 'style']:
+            if elem.strip():
+                texts.append(elem)
+    return texts
+
+def translate_html_file(src_path, dest_path, target_lang_code):
+    """Translate an HTML file."""
+    print(f"Translating {src_path} â†’ {dest_path}")
+    with open(src_path, 'r', encoding='utf-8') as f:
+        html = f.read()
+
+    soup = BeautifulSoup(html, 'html.parser')
+    text_nodes = extract_translatable_text(soup)
+
+    # Optional: load glossary for this language
+    glossary = {}
+    glossary_path = Path(__file__).parent / f"glossary_{target_lang_code}.json"
+    if glossary_path.exists():
+        with open(glossary_path, 'r') as f:
+            glossary = json.load(f)
+
+    # Translate each text node
+    for node in text_nodes:
+        original = str(node)
+        translated = translate_text(original, LANGUAGES[target_lang_code], glossary)
+        node.replace_with(translated)
+        time.sleep(0.1)  # be gentle on CPU
+
+    # Save translated HTML
+    dest_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(dest_path, 'w', encoding='utf-8') as f:
+        f.write(str(soup))
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--langs", required=True, help="Comma-separated language codes (e.g., es,fr)")
+    parser.add_argument("--src", default="content/en", help="Source directory (English)")
+    parser.add_argument("--dest", default="content", help="Base destination directory")
+    args = parser.parse_args()
+
+    lang_codes = args.langs.split(',')
+    src_base = Path(args.src)
+    dest_base = Path(args.dest)
+
+    for lang_code in lang_codes:
+        if lang_code not in LANGUAGES:
+            print(f"Unsupported language: {lang_code}")
+            continue
+
+        print(f"\n=== Translating to {LANGUAGES[lang_code]} ({lang_code}) ===")
+        for html_file in src_base.rglob("*.html"):
+            rel_path = html_file.relative_to(src_base)
+            dest_file = dest_base / lang_code / rel_path
+            translate_html_file(html_file, dest_file, lang_code)
+
+    print("\nâœ… Translation complete.")
+
+if __name__ == "__main__":
+    main()