多线程中的共享对象
课程
代码中存在一个间歇性的bug,表现不一致。其间歇性的性质使得难以有效地重现和调试。
这种间歇性行为源于translate_markdown_file
函数,特别是translate_front_matter
函数,处理共享数据的方式。这些函数可能会访问和修改共享的数据结构,如字典或列表,而没有正确的同步。
当多个线程同时访问和修改相同的数据时,就会导致竞争条件。竞争条件发生在数据的最终状态取决于线程执行顺序的不可预测性时。这可能导致数据篡改、意外的程序行为和你观察到的间歇性bug。
要解决这个问题,你需要要么避免在线程之间共享可变数据,要么使用适当的同步机制,如锁,来保护共享数据。在这种情况下,front_matter_dict
正在原地修改,这不是线程安全的。解决方法是在修改前创建字典的副本。这在代码中已经完成了,但重要的是理解为什么这是必要的。
上下文
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
futures = []
for filename in changed_files:
input_file = filename
for lang in languages:
print(f"提交翻译任务为{filename}到{lang}...")
future = executor.submit(translate_markdown_file, input_file, os.path.join(f"_posts/{lang}", os.path.basename(filename).replace(".md", f"-{lang}.md")), lang, dry_run)
futures.append(future)
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"一个线程失败: {e}")
之前
def translate_front_matter(front_matter, target_language, input_file):
print(f" Translating front matter for: {input_file}")
if not front_matter:
print(f" No front matter found for: {input_file}")
return ""
try:
front_matter_dict = {}
if front_matter:
front_matter_dict = yaml.safe_load(front_matter)
print(f" Front matter after safe_load: {front_matter_dict}")
if 'title' in front_matter_dict:
print(f" Translating title: {front_matter_dict['title']}")
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
if isinstance(front_matter_dict['title'], str):
translated_title = translate_text(front_matter_dict['title'], target_language)
if translated_title:
translated_title = translated_title.strip()
if len(translated_title) > 300:
translated_title = translated_title.split('\n')[0]
front_matter_dict['title'] = translated_title
print(f" Translated title to: {translated_title}")
else:
print(f" Title translation failed for: {input_file}")
else:
print(f" Title is not a string, skipping translation for: {input_file}")
else:
print(f" Skipping title translation for {input_file} to {target_language}")
# Always set lang to target_language
# Determine if the file is a translation
original_lang = 'en' # Default to english
if 'lang' in front_matter_dict:
original_lang = front_matter_dict['lang']
if target_language != original_lang:
front_matter_dict['lang'] = target_language
front_matter_dict['translated'] = True
print(f" Marked as translated to {target_language} for: {input_file}")
else:
front_matter_dict['translated'] = False
print(f" Not marked as translated for: {input_file}")
result = "---\n" + yaml.dump(front_matter_dict, allow_unicode=True) + "---"
print(f" Front matter translation complete for: {input_file}")
return result
except yaml.YAMLError as e:
print(f" Error parsing front matter: {e}")
return front_matter
之后
def translate_front_matter(front_matter, target_language, input_file):
print(f" Translating front matter for: {input_file}")
if not front_matter:
print(f" No front matter found for: {input_file}")
return ""
try:
front_matter_dict = {}
if front_matter:
front_matter_dict = yaml.safe_load(front_matter)
print(f" Front matter after safe_load: {front_matter_dict}")
front_matter_dict_copy = front_matter_dict.copy()
if 'title' in front_matter_dict_copy:
print(f" Translating title: {front_matter_dict_copy['title']}")
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
if isinstance(front_matter_dict_copy['title'], str):
translated_title = translate_text(front_matter_dict_copy['title'], target_language)
if translated_title:
translated_title = translated_title.strip()
if len(translated_title) > 300:
translated_title = translated_title.split('\n')[0]
front_matter_dict_copy['title'] = translated_title
print(f" Translated title to: {translated_title}")
else:
print(f" Title translation failed for: {input_file}")
else:
print(f" Title is not a string, skipping translation for: {input_file}")
else:
print(f" Skipping title translation for {input_file} to {target_language}")
# Always set lang to target_language
front_matter_dict_copy['lang'] = target_language
front_matter_dict_copy['translated'] = True
result = "---\n" + yaml.dump(front_matter_dict_copy, allow_unicode=True) + "---"
print(f" Front matter translation complete for: {input_file}")
return result
except yaml.YAMLError as e:
print(f" Error parsing front matter: {e}")
return front_matter