Mistral-Llm-Translator
Sat 17 May 2025
# https://chatgpt.com/share/6767b5fc-cc44-8002-a0f1-68f486d27d9a
import pyutil as pyu
pyu.get_local_pyinfo()
'conda env: ml311; pyv: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0]'
# !pip install sacremoses
print(pyu.ps2("transformers torch sacremoses"))
transformers==4.47.0
torch==2.5.1
sacremoses==0.0.53
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Load a multilingual model (use a Mistral-based model if available)
model_name = "Helsinki-NLP/opus-mt-en-fi" # Example model for English to Finnish translation
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Translate a single line
english_text = "Hello, how are you?"
# Tokenize the input
inputs = tokenizer.encode(english_text, return_tensors="pt", truncation=True)
# Generate translation
outputs = model.generate(inputs, max_length=40, num_beams=4, early_stopping=True)
finnish_translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Print the result
print(f"Original: {english_text}")
print(f"Translated: {finnish_translation}")
Original: Hello, how are you?
Translated: Hei, mitä kuuluu?
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
class CustomTranslatorWithLLM():
def __init__(self, language_code):
model_name = f"Helsinki-NLP/opus-mt-en-{language_code}" # Example model for English to Finnish translation
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def translate(self, text):
# Tokenize the input
inputs = self.tokenizer.encode(text, return_tensors="pt", truncation=True)
# Generate translation
outputs = self.model.generate(inputs, max_length=40, num_beams=4, early_stopping=True)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
def separate_index(self, line):
if ". " in line:
number, text = line.split(". ", 1) # Split by the first occurrence of ". "
else:
number, text = None, line
return number, text
translator = CustomTranslatorWithLLM("fi")
/home/rajaraman/miniconda3/envs/ml311/lib/python3.11/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses.
warnings.warn("Recommended: pip install sacremoses.")
translator.translate("Where can I get a taxi?")
'Mistä saan taksin?'
contents = """
1. Hello, how are you?
2. I am doing well, thank you.
3. What is your name?
4. My name is Sarah.
"""
Score: 10
Category: mythraki