Initial scripts/ files
This commit is contained in:
parent
b59a3884a3
commit
6648269557
|
|
@ -0,0 +1,31 @@
|
|||
# SciSiteForge Scripts
|
||||
|
||||
## 🛠️ Build & Translate
|
||||
|
||||
This site framework supports offline multilingual translation using Llamafile.
|
||||
|
||||
### Prerequisites
|
||||
- Download a multilingual GGUF model (e.g., `mistral-7b-instruct.Q5_K_M.gguf`)
|
||||
- Install [Llamafile](https://github.com/Mozilla-Ocho/llamafile)
|
||||
- Python 3 with `requests` and `beautifulsoup4`
|
||||
|
||||
### Steps
|
||||
1. Launch Llamafile:
|
||||
```bash
|
||||
./mistral-7b-instruct.Q5_K_M.llamafile --port 8080
|
||||
```
|
||||
2. Run translation:
|
||||
```bash
|
||||
python scripts/translate_site.py --langs es,fr
|
||||
```
|
||||
3. Commit translated content:
|
||||
```bash
|
||||
git add es/ fr/
|
||||
```
|
||||
|
||||
> Translated files are saved to `/es/`, `/fr/`, etc., and served alongside English content.
|
||||
```
|
||||
|
||||
#### 📁 `example/content/scripts/glossary_es.json`
|
||||
→ Language-specific scientific term mappings
|
||||
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"genetic drift": "deriva genética",
|
||||
"natural selection": "selección natural",
|
||||
"punctuated equilibrium": "equilibrio puntuado",
|
||||
"allele": "alelo"
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Offline multilingual translation for evo-edu.org using Llamafile.
|
||||
Requires: BeautifulSoup4, requests
|
||||
Install with: pip install beautifulsoup4 requests
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import time
|
||||
from pathlib import Path
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
import requests
|
||||
|
||||
# --- Configuration ---
|
||||
MODEL_API_URL = "http://localhost:8080/completion"
|
||||
LANGUAGES = {
|
||||
"es": "Spanish",
|
||||
"fr": "French",
|
||||
"pt": "Portuguese",
|
||||
"de": "German"
|
||||
}
|
||||
|
||||
def translate_text(text, target_lang_name, glossary=None):
|
||||
"""Translate a block of text using Llamafile."""
|
||||
if not text.strip():
|
||||
return text
|
||||
|
||||
glossary_text = ""
|
||||
if glossary:
|
||||
glossary_text = "Use these translations:\n" + "\n".join(f"'{k}' → '{v}'" for k, v in glossary.items()) + "\n\n"
|
||||
|
||||
prompt = f"""You are a scientific translator. Translate the following English text into {target_lang_name}.
|
||||
Preserve technical terms like "genetic drift" or "natural selection" unless a standard translation exists.
|
||||
Maintain paragraph structure. Do not add commentary.
|
||||
|
||||
{glossary_text}Text:
|
||||
"{text}"
|
||||
|
||||
Translation:"""
|
||||
|
||||
try:
|
||||
response = requests.post(MODEL_API_URL, json={
|
||||
"prompt": prompt,
|
||||
"temperature": 0.1,
|
||||
"stop": ["\n\n", "Text:", "Translation:"],
|
||||
"n_predict": 1024
|
||||
}, timeout=120)
|
||||
response.raise_for_status()
|
||||
result = response.json()["content"].strip()
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f" âš ï¸ Translation failed: {e}")
|
||||
return text # fallback to original
|
||||
|
||||
def extract_translatable_text(soup):
|
||||
"""Extract text nodes for translation, preserving structure."""
|
||||
texts = []
|
||||
for elem in soup.descendants:
|
||||
if isinstance(elem, NavigableString) and elem.parent.name not in ['script', 'style']:
|
||||
if elem.strip():
|
||||
texts.append(elem)
|
||||
return texts
|
||||
|
||||
def translate_html_file(src_path, dest_path, target_lang_code):
|
||||
"""Translate an HTML file."""
|
||||
print(f"Translating {src_path} → {dest_path}")
|
||||
with open(src_path, 'r', encoding='utf-8') as f:
|
||||
html = f.read()
|
||||
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
text_nodes = extract_translatable_text(soup)
|
||||
|
||||
# Optional: load glossary for this language
|
||||
glossary = {}
|
||||
glossary_path = Path(__file__).parent / f"glossary_{target_lang_code}.json"
|
||||
if glossary_path.exists():
|
||||
with open(glossary_path, 'r') as f:
|
||||
glossary = json.load(f)
|
||||
|
||||
# Translate each text node
|
||||
for node in text_nodes:
|
||||
original = str(node)
|
||||
translated = translate_text(original, LANGUAGES[target_lang_code], glossary)
|
||||
node.replace_with(translated)
|
||||
time.sleep(0.1) # be gentle on CPU
|
||||
|
||||
# Save translated HTML
|
||||
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(dest_path, 'w', encoding='utf-8') as f:
|
||||
f.write(str(soup))
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--langs", required=True, help="Comma-separated language codes (e.g., es,fr)")
|
||||
parser.add_argument("--src", default="content/en", help="Source directory (English)")
|
||||
parser.add_argument("--dest", default="content", help="Base destination directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
lang_codes = args.langs.split(',')
|
||||
src_base = Path(args.src)
|
||||
dest_base = Path(args.dest)
|
||||
|
||||
for lang_code in lang_codes:
|
||||
if lang_code not in LANGUAGES:
|
||||
print(f"Unsupported language: {lang_code}")
|
||||
continue
|
||||
|
||||
print(f"\n=== Translating to {LANGUAGES[lang_code]} ({lang_code}) ===")
|
||||
for html_file in src_base.rglob("*.html"):
|
||||
rel_path = html_file.relative_to(src_base)
|
||||
dest_file = dest_base / lang_code / rel_path
|
||||
translate_html_file(html_file, dest_file, lang_code)
|
||||
|
||||
print("\n✅ Translation complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue