027_国际化与本地化
概述
本文档介绍如何构建支持多语言、多地区的Claude应用,包括语言检测、自动翻译、文化适配等功能。
语言检测与处理
1. 语言检测系统
import re
from typing import Dict, Optional, List, Tuple
from langdetect import detect, detect_langs
from anthropic import Anthropic
class LanguageDetector:
def __init__(self):
# 语言映射表
self.language_map = {
'zh': 'zh-CN', # 中文简体
'zh-cn': 'zh-CN',
'zh-tw': 'zh-TW', # 中文繁体
'en': 'en-US',
'ja': 'ja-JP',
'ko': 'ko-KR',
'fr': 'fr-FR',
'de': 'de-DE',
'es': 'es-ES',
'ru': 'ru-RU',
'ar': 'ar-SA',
'hi': 'hi-IN'
}
# 语言特征模式
self.language_patterns = {
'zh-CN': [
r'[\u4e00-\u9fff]', # 中文字符
r'[的是了在有个为上也把]' # 简体字特征词
],
'zh-TW': [
r'[\u4e00-\u9fff]',
r'[的是了在有個為上也把]' # 繁体字特征词
],
'ja-JP': [
r'[\u3040-\u309f]', # 平假名
r'[\u30a0-\u30ff]', # 片假名
r'[\u4e00-\u9faf]' # 汉字
],
'ko-KR': [
r'[\uac00-\ud7af]' # 韩文字符
],
'ar-SA': [
r'[\u0600-\u06ff]' # 阿拉伯字符
],
'hi-IN': [
r'[\u0900-\u097f]' # 天城文字符
]
}
def detect_language(self, text: str) -> Dict[str, str]:
"""检测文本语言"""
if not text.strip():
return {'language': 'unknown', 'confidence': 0.0}
# 使用多种方法检测
detection_results = {}
# 1. 基于字符模式检测
pattern_result = self._detect_by_patterns(text)
if pattern_result:
detection_results['pattern'] = pattern_result
# 2. 使用langdetect库
try:
lang_detect_result = detect(text)
confidence_results = detect_langs(text)
# 映射到标准语言代码
mapped_lang = self.language_map.get(
lang_detect_result,
lang_detect_result
)
detection_results['langdetect'] = {
'language': mapped_lang,
'confidence': confidence_results[0].prob if confidence_results else 0.0
}
except Exception as e:
print(f"语言检测失败: {e}")
# 3. 综合判断
final_result = self._combine_results(detection_results)
return final_result
def _detect_by_patterns(self, text: str) -> Optional[Dict[str, str]]:
"""基于字符模式检测语言"""
pattern_scores = {}
for lang, patterns in self.language_patterns.items():
score = 0
for pattern in patterns:
matches = len(re.findall(pattern, text))
score += matches
if score > 0:
pattern_scores[lang] = score / len(text)
if pattern_scores:
best_lang = max(pattern_scores, key=pattern_scores.get)
return {
'language': best_lang,
'confidence': min(pattern_scores[best_lang] * 2, 1.0)
}
return None
def _combine_results(self, results: Dict) -> Dict[str, str]:
"""综合多种检测结果"""
if not results:
return {'language': 'en-US', 'confidence': 0.0}
# 优先使用模式匹配的结果(对中日韩阿拉伯文更准确)
if 'pattern' in results and results['pattern']['confidence'] > 0.7:
return results['pattern']
# 否则使用langdetect的结果
if 'langdetect' in results:
return results['langdetect']
# 最后使用模式匹配的结果
if 'pattern' in results:
return results['pattern']
return {'language': 'en-US', 'confidence': 0.0}
# 多语言文本处理器
class MultilingualTextProcessor:
def __init__(self, client: Anthropic):
self.client = client
self.detector = LanguageDetector()
self.supported_languages = {
'zh-CN': '中文 (简体)',
'zh-TW': '中文 (繁體)',
'en-US': 'English',
'ja-JP': '日本語',
'ko-KR': '한국어',
'fr-FR': 'Français',
'de-DE': 'Deutsch',
'es-ES': 'Español',
'ru-RU': 'Русский',
'ar-SA': 'العربية',
'hi-IN': 'हिन्दी'
}
def process_multilingual_input(
self,
text: str,
target_language: Optional[str] = None
) -> Dict[str, str]:
"""处理多语言输入"""
# 检测输入语言
detection = self.detector.detect_language(text)
source_language = detection['language']
result = {
'original_text': text,
'detected_language': source_language,
'confidence': detection['confidence'],
'processed_text': text
}
# 如果指定了目标语言且与检测语言不同,进行翻译
if (target_language and
target_language != source_language and
target_language in self.supported_languages):
translated = self.translate_text(
text,
source_language,
target_language
)
result['translated_text'] = translated
result['target_language'] = target_language
return result
def translate_text(
self,
text: str,
source_lang: str,
target_lang: str
) -> str:
"""翻译文本"""
source_name = self.supported_languages.get(source_lang, source_lang)
target_name = self.supported_languages.get(target_lang, target_lang)
prompt = f"""请将以下{source_name}文本翻译成{target_name}:
原文:{text}
翻译(请只返回翻译结果,不要包含其他说明):"""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
messages=[{
"role": "user",
"content": prompt
}]
)
return response.content[0].text.strip()
2. 智能语言适配
class LanguageAdapter:
def __init__(self, client: Anthropic):
self.client = client
self.language_configs = {
'zh-CN': {
'greeting': '您好',
'system_prompt': '你是一个有用的AI助手,请用简体中文回答。',
'date_format': '%Y年%m月%d日',
'number_format': 'chinese_simplified',
'cultural_context': 'chinese_mainland'
},
'zh-TW': {
'greeting': '您好',
'system_prompt': '你是一個有用的AI助手,請用繁體中文回答。',
'date_format': '%Y年%m月%d日',
'number_format': 'chinese_traditional',
'cultural_context': 'taiwan'
},
'en-US': {
'greeting': 'Hello',
'system_prompt': 'You are a helpful AI assistant. Please respond in English.',
'date_format': '%B %d, %Y',
'number_format': 'us',
'cultural_context': 'american'
},
'ja-JP': {
'greeting': 'こんにちは',
'system_prompt': 'あなたは役に立つAIアシスタントです。日本語で回答してください。',
'date_format': '%Y年%m月%d日',
'number_format': 'japanese',
'cultural_context': 'japanese'
},
'ko-KR': {
'greeting': '안녕하세요',
'system_prompt': '당신은 도움이 되는 AI 어시스턴트입니다. 한국어로 답변해주세요.',
'date_format': '%Y년 %m월 %d일',
'number_format': 'korean',
'cultural_context': 'korean'
},
'ar-SA': {
'greeting': 'السلام عليكم',
'system_prompt': 'أنت مساعد ذكي مفيد. يرجى الرد باللغة العربية.',
'date_format': '%d/%m/%Y',
'number_format': 'arabic',
'cultural_context': 'arabic',
'rtl': True # 从右到左
}
}
def adapt_response(
self,
user_input: str,
detected_language: str,
target_language: Optional[str] = None
) -> str:
"""根据语言适配响应"""
# 确定使用的语言
response_language = target_language or detected_language
if response_language not in self.language_configs:
response_language = 'en-US' # 默认语言
config = self.language_configs[response_language]
# 构建系统提示
system_prompt = config['system_prompt']
# 添加文化上下文
cultural_context = self._get_cultural_context(config['cultural_context'])
if cultural_context:
system_prompt += f"\n\n{cultural_context}"
# 发送请求
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
system=system_prompt,
messages=[{
"role": "user",
"content": user_input
}]
)
response_text = response.content[0].text
# 后处理:格式化数字、日期等
formatted_response = self._format_response(
response_text,
config
)
return formatted_response
def _get_cultural_context(self, culture: str) -> str:
"""获取文化上下文提示"""
contexts = {
'chinese_mainland': '请注意中国大陆的文化背景和社会环境。',
'taiwan': '請注意台灣的文化背景和社會環境。',
'american': 'Please consider American cultural context and social norms.',
'japanese': '日本の文化的背景と社会的文脈を考慮してください。',
'korean': '한국의 문화적 배경과 사회적 맥락을 고려해주세요.',
'arabic': 'يرجى مراعاة السياق الثقافي والاجتماعي العربي.'
}
return contexts.get(culture, '')
def _format_response(
self,
text: str,
config: Dict[str, str]
) -> str:
"""格式化响应文本"""
formatted = text
# 格式化日期(简化示例)
if config['date_format'] and 'today' in text.lower():
from datetime import datetime
today = datetime.now().strftime(config['date_format'])
formatted = re.sub(
r'\btoday\b',
today,
formatted,
flags=re.IGNORECASE
)
# 其他格式化逻辑...
return formatted
# 区域化管理器
class LocalizationManager:
def __init__(self):
self.localizations = {}
self.default_locale = 'en-US'
def load_localizations(self, locale_data: Dict[str, Dict[str, str]]):
"""加载本地化数据"""
self.localizations.update(locale_data)
def get_text(
self,
key: str,
locale: str,
**kwargs
) -> str:
"""获取本地化文本"""
# 尝试获取指定语言的文本
if locale in self.localizations:
locale_texts = self.localizations[locale]
if key in locale_texts:
text = locale_texts[key]
# 支持参数替换
return text.format(**kwargs) if kwargs else text
# 回退到默认语言
if self.default_locale in self.localizations:
default_texts = self.localizations[self.default_locale]
if key in default_texts:
text = default_texts[key]
return text.format(**kwargs) if kwargs else text
# 如果都没有,返回key本身
return key
def get_supported_locales(self) -> List[str]:
"""获取支持的语言列表"""
return list(self.localizations.keys())
# 本地化数据示例
LOCALIZATION_DATA = {
'en-US': {
'welcome': 'Welcome!',
'error_occurred': 'An error occurred: {error}',
'processing': 'Processing your request...',
'goodbye': 'Goodbye! Have a great day!',
'help_text': 'How can I help you today?'
},
'zh-CN': {
'welcome': '欢迎!',
'error_occurred': '发生错误:{error}',
'processing': '正在处理您的请求...',
'goodbye': '再见!祝您有美好的一天!',
'help_text': '今天我可以为您做些什么?'
},
'zh-TW': {
'welcome': '歡迎!',
'error_occurred': '發生錯誤:{error}',
'processing': '正在處理您的請求...',
'goodbye': '再見!祝您有美好的一天!',
'help_text': '今天我可以為您做些什麼?'
},
'ja-JP': {
'welcome': 'いらっしゃいませ!',
'error_occurred': 'エラーが発生しました:{error}',
'processing': 'リクエストを処理中...',
'goodbye': 'さようなら!良い一日をお過ごしください!',
'help_text': '今日はどのようにお手伝いできますか?'
}
}
自动翻译系统
1. 上下文感知翻译
class ContextAwareTranslator:
def __init__(self, client: Anthropic):
self.client = client
self.translation_cache = {}
self.context_memory = {}
def translate_with_context(
self,
text: str,
source_lang: str,
target_lang: str,
context: Optional[str] = None,
domain: Optional[str] = None
) -> Dict[str, str]:
"""带上下文的翻译"""
# 检查缓存
cache_key = f"{source_lang}:{target_lang}:{hash(text)}"
if cache_key in self.translation_cache:
return self.translation_cache[cache_key]
# 构建翻译提示
prompt_parts = []
# 添加领域信息
if domain:
domain_context = self._get_domain_context(domain, target_lang)
prompt_parts.append(domain_context)
# 添加上下文信息
if context:
prompt_parts.append(f"上下文:{context}")
# 添加翻译指令
translation_instruction = self._build_translation_instruction(
source_lang,
target_lang,
domain
)
prompt_parts.append(translation_instruction)
# 添加待翻译文本
prompt_parts.append(f"待翻译文本:{text}")
prompt_parts.append("翻译结果:")
prompt = "\n\n".join(prompt_parts)
# 调用API
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=2000,
temperature=0.3, # 降低温度以获得更一致的翻译
messages=[{
"role": "user",
"content": prompt
}]
)
translation = response.content[0].text.strip()
# 后处理
cleaned_translation = self._post_process_translation(
translation,
target_lang
)
result = {
'source_text': text,
'target_text': cleaned_translation,
'source_language': source_lang,
'target_language': target_lang,
'domain': domain,
'context': context
}
# 缓存结果
self.translation_cache[cache_key] = result
return result
def _build_translation_instruction(
self,
source_lang: str,
target_lang: str,
domain: Optional[str]
) -> str:
"""构建翻译指令"""
lang_names = {
'zh-CN': '简体中文',
'zh-TW': '繁体中文',
'en-US': '英语',
'ja-JP': '日语',
'ko-KR': '韩语',
'fr-FR': '法语',
'de-DE': '德语',
'es-ES': '西班牙语'
}
source_name = lang_names.get(source_lang, source_lang)
target_name = lang_names.get(target_lang, target_lang)
instruction = f"请将以下{source_name}文本准确翻译成{target_name}。"
# 添加特定要求
requirements = [
"保持原文的语气和风格",
"确保术语的准确性",
"保持句子的自然流畅"
]
if domain:
domain_requirements = self._get_domain_requirements(domain)
requirements.extend(domain_requirements)
instruction += "翻译要求:\n" + "\n".join(f"- {req}" for req in requirements)
return instruction
def _get_domain_context(self, domain: str, target_lang: str) -> str:
"""获取领域上下文"""
domain_contexts = {
'technical': {
'zh-CN': '这是技术文档翻译,请注意保持技术术语的准确性。',
'en-US': 'This is technical documentation translation. Please maintain accuracy of technical terms.'
},
'medical': {
'zh-CN': '这是医学文档翻译,请确保医学术语的准确性。',
'en-US': 'This is medical documentation translation. Please ensure accuracy of medical terminology.'
},
'legal': {
'zh-CN': '这是法律文档翻译,请保持法律术语的精确性。',
'en-US': 'This is legal documentation translation. Please maintain precision of legal terms.'
},
'business': {
'zh-CN': '这是商务文档翻译,请使用正式的商务语言。',
'en-US': 'This is business documentation translation. Please use formal business language.'
}
}
return domain_contexts.get(domain, {}).get(target_lang, '')
def _get_domain_requirements(self, domain: str) -> List[str]:
"""获取领域特定要求"""
requirements = {
'technical': [
'保持技术术语不变或使用标准翻译',
'保留代码片段和命令不翻译'
],
'medical': [
'使用标准医学术语',
'保持剂量和医学数据的准确性'
],
'legal': [
'使用准确的法律术语',
'保持条款结构的清晰性'
],
'business': [
'使用正式商务语言',
'保持数字和日期格式的本地化'
]
}
return requirements.get(domain, [])
def _post_process_translation(
self,
translation: str,
target_lang: str
) -> str:
"""后处理翻译结果"""
cleaned = translation
# 移除多余的引号或说明
cleaned = re.sub(r'^["\']|["\']$', '', cleaned)
cleaned = re.sub(r'^翻译[::]?\s*', '', cleaned)
cleaned = re.sub(r'^Translation[:]?\s*', '', cleaned, flags=re.IGNORECASE)
# 处理特定语言的格式
if target_lang in ['zh-CN', 'zh-TW']:
# 中文标点符号规范化
cleaned = cleaned.replace('。 ', '。')
cleaned = cleaned.replace(', ', ',')
return cleaned.strip()
# 批量翻译管理器
class BatchTranslationManager:
def __init__(self, translator: ContextAwareTranslator):
self.translator = translator
self.translation_queue = []
self.results = {}
def add_translation_task(
self,
task_id: str,
text: str,
source_lang: str,
target_lang: str,
context: Optional[str] = None,
domain: Optional[str] = None,
priority: int = 1
):
"""添加翻译任务"""
task = {
'id': task_id,
'text': text,
'source_lang': source_lang,
'target_lang': target_lang,
'context': context,
'domain': domain,
'priority': priority,
'created_at': datetime.now()
}
self.translation_queue.append(task)
# 按优先级排序
self.translation_queue.sort(
key=lambda x: (x['priority'], x['created_at']),
reverse=True
)
def process_batch(self, batch_size: int = 10) -> Dict[str, Any]:
"""批量处理翻译任务"""
if not self.translation_queue:
return {'processed': 0, 'results': {}}
# 取出待处理的任务
tasks_to_process = self.translation_queue[:batch_size]
self.translation_queue = self.translation_queue[batch_size:]
# 处理任务
batch_results = {}
processed_count = 0
for task in tasks_to_process:
try:
result = self.translator.translate_with_context(
text=task['text'],
source_lang=task['source_lang'],
target_lang=task['target_lang'],
context=task['context'],
domain=task['domain']
)
batch_results[task['id']] = {
'status': 'success',
'result': result,
'processed_at': datetime.now()
}
processed_count += 1
except Exception as e:
batch_results[task['id']] = {
'status': 'error',
'error': str(e),
'processed_at': datetime.now()
}
# 更新结果
self.results.update(batch_results)
return {
'processed': processed_count,
'total_in_batch': len(tasks_to_process),
'remaining_in_queue': len(self.translation_queue),
'results': batch_results
}
2. 翻译质量评估
class TranslationQualityAssessor:
def __init__(self, client: Anthropic):
self.client = client
self.quality_metrics = [
'accuracy', # 准确性
'fluency', # 流畅性
'completeness', # 完整性
'consistency', # 一致性
'cultural_appropriateness' # 文化适应性
]
def assess_translation(
self,
source_text: str,
translated_text: str,
source_lang: str,
target_lang: str,
domain: Optional[str] = None
) -> Dict[str, Any]:
"""评估翻译质量"""
assessment_prompt = self._build_assessment_prompt(
source_text,
translated_text,
source_lang,
target_lang,
domain
)
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1500,
temperature=0.1,
messages=[{
"role": "user",
"content": assessment_prompt
}]
)
# 解析评估结果
assessment_text = response.content[0].text
scores = self._parse_assessment_scores(assessment_text)
# 计算总体分数
overall_score = sum(scores.values()) / len(scores) if scores else 0
# 生成建议
suggestions = self._generate_improvement_suggestions(
scores,
source_text,
translated_text
)
return {
'overall_score': overall_score,
'detailed_scores': scores,
'suggestions': suggestions,
'assessment_text': assessment_text
}
def _build_assessment_prompt(
self,
source_text: str,
translated_text: str,
source_lang: str,
target_lang: str,
domain: Optional[str]
) -> str:
"""构建评估提示"""
lang_names = {
'zh-CN': '简体中文',
'en-US': '英语',
'ja-JP': '日语'
}
source_name = lang_names.get(source_lang, source_lang)
target_name = lang_names.get(target_lang, target_lang)
prompt = f"""请评估以下翻译质量。从{source_name}翻译到{target_name}:
原文:{source_text}
译文:{translated_text}
请从以下几个方面评分(1-10分,10分最高):
1. 准确性(Accuracy)- 翻译是否准确传达原文意思
2. 流畅性(Fluency)- 译文是否自然流畅
3. 完整性(Completeness)- 是否完整翻译了所有内容
4. 一致性(Consistency)- 术语和风格是否一致
5. 文化适应性(Cultural Appropriateness)- 是否适应目标语言文化
请按以下格式输出:
准确性:X分 - 评价
流畅性:X分 - 评价
完整性:X分 - 评价
一致性:X分 - 评价
文化适应性:X分 - 评价
总体评价:
改进建议:"""
if domain:
prompt += f"\n\n注意:这是{domain}领域的翻译,请特别关注专业术语的准确性。"
return prompt
def _parse_assessment_scores(self, assessment_text: str) -> Dict[str, float]:
"""解析评估分数"""
scores = {}
patterns = {
'accuracy': r'准确性[::]\s*(\d+(?:\.\d+)?)分',
'fluency': r'流畅性[::]\s*(\d+(?:\.\d+)?)分',
'completeness': r'完整性[::]\s*(\d+(?:\.\d+)?)分',
'consistency': r'一致性[::]\s*(\d+(?:\.\d+)?)分',
'cultural_appropriateness': r'文化适应性[::]\s*(\d+(?:\.\d+)?)分'
}
for metric, pattern in patterns.items():
match = re.search(pattern, assessment_text)
if match:
scores[metric] = float(match.group(1))
return scores
def _generate_improvement_suggestions(
self,
scores: Dict[str, float],
source_text: str,
translated_text: str
) -> List[str]:
"""生成改进建议"""
suggestions = []
# 基于分数生成建议
if scores.get('accuracy', 10) < 7:
suggestions.append("建议重新检查翻译的准确性,确保所有关键信息都被正确传达")
if scores.get('fluency', 10) < 7:
suggestions.append("建议改进译文的流畅性,使其更符合目标语言的表达习惯")
if scores.get('completeness', 10) < 8:
suggestions.append("请检查是否遗漏了任何内容,确保翻译的完整性")
if scores.get('consistency', 10) < 7:
suggestions.append("建议统一术语翻译,保持整体风格的一致性")
if scores.get('cultural_appropriateness', 10) < 7:
suggestions.append("建议考虑目标语言的文化背景,调整表达方式")
return suggestions
文化适配
1. 文化敏感内容处理
class CulturalAdaptationManager:
def __init__(self, client: Anthropic):
self.client = client
self.cultural_rules = self._load_cultural_rules()
self.sensitive_topics = self._load_sensitive_topics()
def _load_cultural_rules(self) -> Dict[str, Dict]:
"""加载文化规则"""
return {
'zh-CN': {
'formal_address': True, # 使用敬语
'avoid_direct_refusal': True, # 避免直接拒绝
'prefer_indirect_communication': True, # 偏好间接沟通
'respect_hierarchy': True, # 尊重等级
'gift_giving_taboos': ['钟表', '白花'],
'lucky_numbers': [6, 8, 9],
'unlucky_numbers': [4, 7],
'color_meanings': {
'red': 'fortune, joy',
'white': 'mourning, purity',
'black': 'evil, mourning'
}
},
'ja-JP': {
'formal_address': True,
'bow_culture': True,
'group_harmony': True,
'avoid_direct_confrontation': True,
'business_card_etiquette': True,
'gift_wrapping_important': True,
'unlucky_numbers': [4, 9],
'seasonal_awareness': True
},
'ar-SA': {
'islamic_considerations': True,
'right_hand_preference': True,
'modest_dress': True,
'prayer_times': True,
'halal_requirements': True,
'family_honor': True,
'gender_interactions': 'conservative'
},
'en-US': {
'direct_communication': True,
'individual_focus': True,
'time_sensitive': True,
'informal_acceptable': True,
'diversity_awareness': True
}
}
def _load_sensitive_topics(self) -> Dict[str, List[str]]:
"""加载敏感话题"""
return {
'zh-CN': ['政治敏感', '历史争议', '社会敏感事件'],
'ja-JP': ['历史问题', '战争相关', '等级制度'],
'ar-SA': ['宗教争议', '政治制度', '社会改革'],
'general': ['种族歧视', '性别歧视', '暴力内容']
}
def adapt_content(
self,
content: str,
target_culture: str,
content_type: str = 'general'
) -> Dict[str, Any]:
"""适配内容到目标文化"""
cultural_rules = self.cultural_rules.get(target_culture, {})
# 检查敏感内容
sensitivity_check = self._check_cultural_sensitivity(
content,
target_culture
)
if sensitivity_check['has_issues']:
adapted_content = self._adapt_sensitive_content(
content,
target_culture,
sensitivity_check['issues']
)
else:
adapted_content = content
# 应用文化规则
final_content = self._apply_cultural_rules(
adapted_content,
cultural_rules,
content_type
)
return {
'original_content': content,
'adapted_content': final_content,
'target_culture': target_culture,
'adaptations_made': sensitivity_check.get('issues', []),
'cultural_rules_applied': list(cultural_rules.keys())
}
def _check_cultural_sensitivity(
self,
content: str,
culture: str
) -> Dict[str, Any]:
"""检查文化敏感性"""
sensitive_topics = self.sensitive_topics.get(culture, [])
sensitive_topics.extend(self.sensitive_topics.get('general', []))
issues = []
# 简单的关键词检查(实际应用中应使用更复杂的NLP)
for topic in sensitive_topics:
if topic in content:
issues.append(f"包含敏感话题: {topic}")
# 检查数字忌讳
cultural_rules = self.cultural_rules.get(culture, {})
unlucky_numbers = cultural_rules.get('unlucky_numbers', [])
for number in unlucky_numbers:
if str(number) in content:
issues.append(f"包含不吉利数字: {number}")
return {
'has_issues': len(issues) > 0,
'issues': issues
}
def _adapt_sensitive_content(
self,
content: str,
culture: str,
issues: List[str]
) -> str:
"""适配敏感内容"""
adaptation_prompt = f"""请将以下内容适配到{culture}文化背景,注意以下问题:
原内容:{content}
发现的文化敏感问题:
{chr(10).join(f"- {issue}" for issue in issues)}
请提供文化适配后的内容,确保:
1. 尊重目标文化的价值观
2. 避免文化冲突
3. 保持内容的核心意思
4. 使用适当的表达方式
适配后的内容:"""
response = self.client.messages.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1500,
messages=[{
"role": "user",
"content": adaptation_prompt
}]
)
return response.content[0].text.strip()
def _apply_cultural_rules(
self,
content: str,
rules: Dict[str, Any],
content_type: str
) -> str:
"""应用文化规则"""
modified_content = content
# 应用正式称谓
if rules.get('formal_address') and content_type in ['business', 'formal']:
modified_content = self._apply_formal_address(modified_content)
# 应用间接沟通风格
if rules.get('prefer_indirect_communication'):
modified_content = self._apply_indirect_style(modified_content)
return modified_content
def _apply_formal_address(self, content: str) -> str:
"""应用正式称谓"""
# 简化示例:将"你"替换为"您"
return content.replace('你', '您')
def _apply_indirect_style(self, content: str) -> str:
"""应用间接沟通风格"""
# 简化示例:软化直接的陈述
patterns = [
(r'不可以', '可能不太合适'),
(r'错误', '可能需要重新考虑'),
(r'必须', '建议'),
]
for pattern, replacement in patterns:
content = re.sub(pattern, replacement, content)
return content
# 使用示例
def create_multicultural_app():
client = Anthropic(api_key="your-api-key")
# 初始化组件
detector = LanguageDetector()
processor = MultilingualTextProcessor(client)
translator = ContextAwareTranslator(client)
adapter = LanguageAdapter(client)
cultural_manager = CulturalAdaptationManager(client)
# 本地化管理
localization = LocalizationManager()
localization.load_localizations(LOCALIZATION_DATA)
def process_user_input(
user_input: str,
target_language: Optional[str] = None,
target_culture: Optional[str] = None
):
"""处理用户输入with完整的国际化支持"""
# 1. 检测语言
detection = detector.detect_language(user_input)
detected_lang = detection['language']
# 2. 处理多语言输入
processed = processor.process_multilingual_input(
user_input,
target_language
)
# 3. 生成响应
response = adapter.adapt_response(
user_input,
detected_lang,
target_language
)
# 4. 文化适配
if target_culture:
cultural_adaptation = cultural_manager.adapt_content(
response,
target_culture
)
response = cultural_adaptation['adapted_content']
# 5. 本地化UI文本
ui_locale = target_language or detected_lang
welcome_text = localization.get_text('welcome', ui_locale)
return {
'detected_language': detected_lang,
'response': response,
'ui_texts': {
'welcome': welcome_text,
'help': localization.get_text('help_text', ui_locale)
},
'cultural_adaptations': target_culture is not None
}
return process_user_input
最佳实践
1. 国际化架构设计
# 国际化最佳实践
I18N_BEST_PRACTICES = {
'language_detection': {
'use_multiple_methods': '结合多种检测方法提高准确性',
'handle_mixed_languages': '处理混合语言文本',
'confidence_threshold': '设置置信度阈值',
'fallback_language': '设置默认回退语言'
},
'translation': {
'preserve_context': '保持上下文信息',
'domain_specific': '使用领域特定的翻译',
'quality_assurance': '实施翻译质量保证',
'consistency_check': '检查术语一致性'
},
'cultural_adaptation': {
'research_target_culture': '深入研究目标文化',
'avoid_stereotypes': '避免文化刻板印象',
'local_review': '使用本地人员审核',
'continuous_learning': '持续学习文化差异'
},
'technical_implementation': {
'unicode_support': '完整的Unicode支持',
'rtl_support': '支持从右到左的语言',
'date_time_localization': '日期时间本地化',
'number_formatting': '数字格式本地化'
}
}
class InternationalizationBestPractices:
@staticmethod
def validate_i18n_implementation(app_components: Dict) -> Dict[str, bool]:
"""验证国际化实现是否符合最佳实践"""
checks = {
'has_language_detection': 'detector' in app_components,
'has_translation_system': 'translator' in app_components,
'has_cultural_adaptation': 'cultural_manager' in app_components,
'has_localization': 'localization' in app_components,
'supports_rtl': hasattr(app_components.get('adapter'), 'rtl_support'),
'has_quality_assessment': 'quality_assessor' in app_components
}
return checks
这个国际化与本地化系统提供了全面的多语言、多文化支持,帮助开发者构建真正国际化的AI应用。