diff --git a/.gitignore b/.gitignore index d2386333..2286d460 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,12 @@ dist/ wheels/ *.egg-info tests/ +!tests/ +tests/* +!tests/test_workflow/ +tests/test_workflow/* +!tests/test_workflow/test_cad_workflow.py +!tests/test_workflow/test_cad_simple.dxf tests/resource/ tests/test_data/ htmlcov/ diff --git a/docutranslate/cad/__init__.py b/docutranslate/cad/__init__.py new file mode 100644 index 00000000..1113d18e --- /dev/null +++ b/docutranslate/cad/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: 2025 QinHan +# SPDX-License-Identifier: MPL-2.0 diff --git a/docutranslate/cad/dwg_converter.py b/docutranslate/cad/dwg_converter.py new file mode 100644 index 00000000..d0e6fe62 --- /dev/null +++ b/docutranslate/cad/dwg_converter.py @@ -0,0 +1,157 @@ +# SPDX-FileCopyrightText: 2025 QinHan +# SPDX-License-Identifier: MPL-2.0 +"""DWG ↔ DXF converter using external backends. + +Detects and invokes user-installed converters (LibreDWG, HaoChen, AutoCAD). +DocuTranslate does NOT bundle converters — users install them separately. +""" +from __future__ import annotations + +import shutil +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +@dataclass +class ConverterResult: + success: bool + input_path: str + output_path: str | None = None + backend_used: str = "" + message: str = "" + + +class DwgConverter: + """Detect and call external DWG ↔ DXF converters.""" + + BACKENDS = { + "libredwg": "dwg2dxf", + "haochen": "haochen_com", + "autocad": "autocad_com", + } + + def __init__(self, backend: str = "auto"): + self.backend = backend.strip().lower() + + # ------------------------------------------------------------------ + # Detection + # ------------------------------------------------------------------ + + def detect_available(self) -> dict[str, bool]: + """Return which backends are available on this system.""" + results: dict[str, bool] = {} + for name, binary in self.BACKENDS.items(): + results[name] = shutil.which(binary) is not None + return results + + def _select_backend(self) -> str: + if self.backend and self.backend != "auto": + return self.backend + available = self.detect_available() + for name in ("libredwg", "haochen", "autocad"): + if available.get(name): + return name + return "" + + # ------------------------------------------------------------------ + # Conversion + # ------------------------------------------------------------------ + + def dwg_to_dxf(self, input_path: str, output_dir: str, backend: str = "") -> ConverterResult: + """Convert DWG to DXF. If input is already DXF, copy it.""" + src = Path(input_path) + if not src.exists(): + return ConverterResult(False, input_path, message=f"File not found: {input_path}") + + if src.suffix.lower() == ".dxf": + out = Path(output_dir) / src.name + out.parent.mkdir(parents=True, exist_ok=True) + if src.resolve() != out.resolve(): + import shutil as _shutil + _shutil.copy2(str(src), str(out)) + return ConverterResult(True, input_path, str(out), "dxf_only", "Input is already DXF") + + be = backend or self._select_backend() + if not be: + return ConverterResult( + False, input_path, + message="No DWG converter available. Install LibreDWG (dwg2dxf), HaoChen CAD, or AutoCAD.", + ) + + out_dir = Path(output_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + if be == "libredwg": + return self._run_libredwg(src, out_dir) + elif be in ("haochen", "autocad"): + return self._run_com_backend(src, out_dir, be) + else: + return ConverterResult(False, input_path, message=f"Unknown backend: {be}") + + def dxf_to_dwg(self, input_path: str, output_dir: str, backend: str = "") -> ConverterResult: + """Convert DXF to DWG (only supported by some backends).""" + src = Path(input_path) + if not src.exists(): + return ConverterResult(False, input_path, message=f"File not found: {input_path}") + + be = backend or self._select_backend() + if be in ("haochen", "autocad"): + return self._run_com_backend_dxf2dwg(src, Path(output_dir), be) + + return ConverterResult(False, input_path, message="DXF→DWG conversion requires HaoChen or AutoCAD backend") + + # ------------------------------------------------------------------ + # Backend implementations + # ------------------------------------------------------------------ + + def _run_libredwg(self, src: Path, out_dir: Path) -> ConverterResult: + exe = shutil.which("dwg2dxf") + if not exe: + return ConverterResult(False, str(src), message="dwg2dxf not found in PATH") + out = out_dir / f"{src.stem}.dxf" + try: + subprocess.run( + [exe, "-o", str(out), str(src)], + check=True, capture_output=True, timeout=120, + ) + return ConverterResult(True, str(src), str(out), "libredwg") + except subprocess.CalledProcessError as e: + return ConverterResult(False, str(src), message=f"LibreDWG failed: {e.stderr.decode(errors='replace')}") + except subprocess.TimeoutExpired: + return ConverterResult(False, str(src), message="LibreDWG conversion timed out") + + def _run_com_backend(self, src: Path, out_dir: Path, backend: str) -> ConverterResult: + """Invoke HaoChen/AutoCAD COM automation via Python script.""" + exe = shutil.which(f"{backend}_converter") + if not exe: + return ConverterResult(False, str(src), message=f"{backend}_converter not found in PATH") + out = out_dir / f"{src.stem}.dxf" + try: + subprocess.run( + [exe, "--input", str(src), "--output", str(out)], + check=True, capture_output=True, timeout=300, + ) + return ConverterResult(True, str(src), str(out), backend) + except subprocess.CalledProcessError as e: + return ConverterResult(False, str(src), message=f"{backend} failed: {e.stderr.decode(errors='replace')}") + except subprocess.TimeoutExpired: + return ConverterResult(False, str(src), message=f"{backend} conversion timed out") + + def _run_com_backend_dxf2dwg(self, src: Path, out_dir: Path, backend: str) -> ConverterResult: + exe = shutil.which(f"{backend}_converter") + if not exe: + return ConverterResult(False, str(src), message=f"{backend}_converter not found in PATH") + out = out_dir / f"{src.stem}.dwg" + try: + subprocess.run( + [exe, "--input", str(src), "--output", str(out), "--format", "dwg"], + check=True, capture_output=True, timeout=300, + ) + return ConverterResult(True, str(src), str(out), backend) + except subprocess.CalledProcessError as e: + return ConverterResult(False, str(src), message=f"{backend} failed: {e.stderr.decode(errors='replace')}") + except subprocess.TimeoutExpired: + return ConverterResult(False, str(src), message=f"{backend} conversion timed out") + diff --git a/docutranslate/cad/text_applier.py b/docutranslate/cad/text_applier.py new file mode 100644 index 00000000..372c82df --- /dev/null +++ b/docutranslate/cad/text_applier.py @@ -0,0 +1,171 @@ +# SPDX-FileCopyrightText: 2025 QinHan +# SPDX-License-Identifier: MPL-2.0 +"""Apply translated text back to DXF files using ezdxf.""" +from __future__ import annotations + +import logging +import math +from dataclasses import dataclass +from pathlib import Path + +logger = logging.getLogger(__name__) + +SUPPORTED_ENTITY_TYPES = ("TEXT", "MTEXT", "ATTDEF", "ATTRIB") + + +@dataclass +class ApplyResult: + success: bool + translated_count: int = 0 + message: str = "" + + +class CadTextApplier: + """Apply translations back to DXF files.""" + + def apply( + self, + dxf_path: str, + output_path: str, + translation_map: dict[str, str], + mode: str = "replace", + font_name: str = "Times New Roman", + font_size_reduction: int = 2, + ) -> ApplyResult: + try: + import ezdxf + except ImportError: + return ApplyResult( + False, message="ezdxf not installed. Install with: pip install docutranslate[cad]" + ) + + src = Path(dxf_path) + out = Path(output_path) + if not src.exists(): + return ApplyResult(False, message=f"File not found: {dxf_path}") + + try: + doc = ezdxf.readfile(str(src)) + except Exception as e: + return ApplyResult(False, message=f"Cannot read DXF: {e}") + + replace_mode = mode == "replace" + translated_count = 0 + + def _process_space(space): + nonlocal translated_count + for entity in list(space): + try: + if self._translate_entity(space, entity, translation_map, font_name, replace_mode, font_size_reduction, doc): + translated_count += 1 + except Exception as e: + logger.debug("Entity translate failed: %s", e) + + _process_space(doc.modelspace()) + for layout in doc.layouts: + if layout.name != "Model": + _process_space(layout) + try: + for block in doc.blocks: + if not block.name.startswith("*"): + _process_space(block) + except Exception as e: + logger.debug("Block translate failed: %s", e) + + out.parent.mkdir(parents=True, exist_ok=True) + doc.saveas(str(out)) + logger.info("Applied %d translations to %s", translated_count, out.name) + return ApplyResult(True, translated_count, f"Translated {translated_count} entities") + + def _smart_match(self, text: str, translation_map: dict[str, str]) -> str | None: + if text in translation_map and translation_map[text].strip(): + return translation_map[text] + strategies = [ + lambda x: re.sub(r"\s+", "", x), + lambda x: re.sub(r"\s+", " ", x.strip()), + lambda x: x.strip(), + ] + for strategy in strategies: + src = strategy(text) + for orig, trans in translation_map.items(): + if strategy(orig) == src and trans.strip(): + return trans + return None + + def _set_font(self, entity, font_name: str, doc) -> None: + try: + style_name = f"TStyle_{font_name.replace(' ', '_')}" + if style_name not in doc.styles: + style = doc.styles.add(style_name, font=font_name) + style.dxf.bigfont = "" + entity.dxf.style = style_name + except Exception as e: + logger.debug("Set font failed: %s", e) + + def _translate_entity(self, owner, entity, translation_map, font_name, replace_mode, font_size_reduction, doc) -> bool: + entity_type = entity.dxftype() + if entity_type not in SUPPORTED_ENTITY_TYPES: + return False + + try: + if entity_type in ("TEXT", "MTEXT"): + original_text = entity.dxf.text + else: + original_text = getattr(entity.dxf, "text", None) or getattr(entity.dxf, "tag", None) + + if not original_text or not original_text.strip(): + return False + + translated = self._smart_match(original_text.strip(), translation_map) + if not translated: + return False + + height = float(getattr(entity.dxf, "height", None) or getattr(entity.dxf, "char_height", 2.5)) + + if replace_mode: + if entity_type in ("TEXT", "ATTDEF", "ATTRIB"): + entity.dxf.text = translated + entity.dxf.height = max(1.0, height - font_size_reduction) + elif entity_type == "MTEXT": + entity.dxf.text = translated + entity.dxf.char_height = max(1.0, height - font_size_reduction) + self._set_font(entity, font_name, doc) + else: + self._add_text_below(owner, entity, translated, font_name, height, font_size_reduction, doc) + + return True + except Exception as e: + logger.debug("Translate entity failed: %s", e) + return False + + def _add_text_below(self, owner, original_entity, translated_text, font_name, original_height, font_size_reduction, doc): + try: + insert_point = getattr(original_entity.dxf, "insert", (0, 0, 0)) + layer = getattr(original_entity.dxf, "layer", "0") + rotation = float(getattr(original_entity.dxf, "rotation", 0)) + + offset_y = -original_height * 1.2 + rotation_rad = rotation * (math.pi / 180.0) + dx = offset_y * math.sin(rotation_rad) + dy = offset_y * math.cos(rotation_rad) + + new_x = float(insert_point[0]) + dx + new_y = float(insert_point[1]) + dy + new_z = float(insert_point[2]) if len(insert_point) > 2 else 0.0 + + style_name = f"TStyle_{font_name.replace(' ', '_')}" + if style_name not in doc.styles: + s = doc.styles.add(style_name, font=font_name) + s.dxf.bigfont = "" + + attribs = { + "insert": (new_x, new_y, new_z), + "height": max(1.0, original_height - font_size_reduction), + "layer": layer, + "rotation": rotation, + "color": 1, + "style": style_name, + } + owner.add_text(translated_text, dxfattribs=attribs) + except Exception as e: + logger.debug("Add text below failed: %s", e) diff --git a/docutranslate/cad/text_extractor.py b/docutranslate/cad/text_extractor.py new file mode 100644 index 00000000..46bf38c5 --- /dev/null +++ b/docutranslate/cad/text_extractor.py @@ -0,0 +1,133 @@ +# SPDX-FileCopyrightText: 2025 QinHan +# SPDX-License-Identifier: MPL-2.0 +"""Extract text entities from DXF files using ezdxf.""" +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +SUPPORTED_ENTITY_TYPES = ("TEXT", "MTEXT", "ATTDEF", "ATTRIB") + + +@dataclass +class TextEntity: + text: str + entity_type: str + layer: str + insert_x: float = 0.0 + insert_y: float = 0.0 + insert_z: float = 0.0 + height: float = 2.5 + rotation: float = 0.0 + space: str = "ModelSpace" + + +@dataclass +class ExtractionResult: + success: bool + entities: list[TextEntity] = field(default_factory=list) + message: str = "" + + +class CadTextExtractor: + """Extract text entities from DXF files.""" + + def extract(self, dxf_path: str) -> ExtractionResult: + try: + import ezdxf + except ImportError: + return ExtractionResult( + False, message="ezdxf not installed. Install with: pip install docutranslate[cad]" + ) + + path = Path(dxf_path) + if not path.exists(): + return ExtractionResult(False, message=f"File not found: {dxf_path}") + + try: + doc = ezdxf.readfile(str(path)) + except Exception as e: + return ExtractionResult(False, message=f"Cannot read DXF: {e}") + + entities: list[TextEntity] = [] + + # Model space + entities.extend(self._extract_space(doc.modelspace(), "ModelSpace")) + + # Paper space layouts + for layout in doc.layouts: + if layout.name != "Model": + entities.extend(self._extract_space(layout, f"PaperSpace_{layout.name}")) + + # Block definitions + try: + for block in doc.blocks: + if not block.name.startswith("*"): + entities.extend(self._extract_space(block, f"Block_{block.name}")) + except Exception as e: + logger.debug("Block extraction failed: %s", e) + + logger.info("Extracted %d text entities from %s", len(entities), path.name) + return ExtractionResult(True, entities, f"Extracted {len(entities)} text entities") + + def _extract_space(self, space, space_name: str) -> list[TextEntity]: + entities = [] + for entity in space: + try: + text_entity = self._extract_entity(entity, space_name) + if text_entity: + entities.append(text_entity) + except Exception as e: + logger.debug("Entity extraction failed: %s", e) + return entities + + def _extract_entity(self, entity, space_name: str) -> TextEntity | None: + entity_type = entity.dxftype() + if entity_type not in SUPPORTED_ENTITY_TYPES: + return None + + try: + if entity_type in ("TEXT", "MTEXT"): + text_content = entity.dxf.text + else: + text_content = getattr(entity.dxf, "text", None) or getattr(entity.dxf, "tag", None) + + if not text_content or not text_content.strip(): + return None + + insert_point = getattr(entity.dxf, "insert", (0, 0, 0)) + height = getattr(entity.dxf, "height", None) or getattr(entity.dxf, "char_height", 2.5) + layer = getattr(entity.dxf, "layer", "0") + rotation = getattr(entity.dxf, "rotation", 0) + + return TextEntity( + text=text_content.strip(), + entity_type=entity_type, + layer=layer, + insert_x=float(insert_point[0]), + insert_y=float(insert_point[1]), + insert_z=float(insert_point[2]) if len(insert_point) > 2 else 0.0, + height=float(height), + rotation=float(rotation), + space=space_name, + ) + except Exception as e: + logger.debug("Entity parse failed: %s", e) + return None + + def to_translation_records(self, result: ExtractionResult) -> list[dict[str, Any]]: + """Convert extraction result to records format for translation.""" + records = [] + for i, entity in enumerate(result.entities): + records.append({ + "record_id": f"cad_{i}", + "source_text": entity.text, + "_entity_type": entity.entity_type, + "_layer": entity.layer, + "_space": entity.space, + }) + return records diff --git a/docutranslate/core/factory.py b/docutranslate/core/factory.py index 3ba5475d..3fed468f 100644 --- a/docutranslate/core/factory.py +++ b/docutranslate/core/factory.py @@ -7,7 +7,7 @@ from docutranslate.agents.glossary_agent import GlossaryAgentConfig from docutranslate.core.schemas import TranslatePayload, MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, \ XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, \ - AssWorkflowParams, PPTXWorkflowParams + AssWorkflowParams, PPTXWorkflowParams, CadWorkflowParams from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig @@ -141,4 +141,28 @@ def build_glossary_agent_config(): return WorkClass(config=workflow_config) + # CAD Workflow (special handling) + if isinstance(payload, CadWorkflowParams): + from docutranslate.workflow.cad_workflow import CadWorkflowConfig, CadWorkflow + + translator_args = payload.model_dump( + include={"skip_translate", "base_url", "api_key", "model_id", "to_lang", "custom_prompt", + "temperature", "top_p", "thinking", "chunk_size", "concurrent", "glossary_dict", "timeout", + "retry", "system_proxy_enable", "force_json", "rpm", "tpm", "provider", "extra_body"}, + exclude_none=True, + ) + translator_args["glossary_generate_enable"] = payload.glossary_generate_enable + translator_args["glossary_agent_config"] = build_glossary_agent_config() + translator_config = TXTTranslatorConfig(**translator_args) + + workflow_config = CadWorkflowConfig( + translator_config=translator_config, + cad_converter_backend=payload.cad_converter_backend, + insert_mode=payload.insert_mode, + font_name=payload.font_name, + font_size_reduction=payload.font_size_reduction, + logger=logger, + ) + return CadWorkflow(config=workflow_config) + raise ValueError(f"未知的 Payload 类型: {type(payload)}") \ No newline at end of file diff --git a/docutranslate/core/schemas.py b/docutranslate/core/schemas.py index 5e66ba94..f645ad76 100644 --- a/docutranslate/core/schemas.py +++ b/docutranslate/core/schemas.py @@ -44,7 +44,7 @@ # --- 公共类型定义 --- WorkflowType = Literal[ "auto", "markdown_based", "txt", "json", "xlsx", "docx", - "srt", "epub", "html", "ass", "pptx" + "srt", "epub", "html", "ass", "pptx", "cad" ] InsertMode = Literal["replace", "append", "prepend"] @@ -564,6 +564,31 @@ class PPTXWorkflowParams(BaseWorkflowParams): # --- PPTX WORKFLOW PARAMS END --- +class CadWorkflowParams(BaseWorkflowParams): + workflow_type: Literal["cad"] = Field( + ..., description="指定使用CAD文件(DWG/DXF)的翻译工作流。" + ) + insert_mode: Literal["replace", "append", "prepend"] = Field( + "replace", + description="翻译文本的插入模式。'replace':替换原文,'append':附加到原文后,'prepend':附加到原文前。", + ) + cad_converter_backend: str = Field( + "auto", + description="DWG转换后端: auto, libredwg, haochen, autocad, dxf_only", + ) + font_name: str = Field( + "Times New Roman", + description="输出字体名称。", + ) + font_size_reduction: int = Field( + 2, + description="字号缩小量。", + ) + + +# --- CAD WORKFLOW PARAMS END --- + + TranslatePayload = Annotated[ Union[ AutoWorkflowParams, @@ -577,6 +602,7 @@ class PPTXWorkflowParams(BaseWorkflowParams): HtmlWorkflowParams, AssWorkflowParams, PPTXWorkflowParams, + CadWorkflowParams, ], Field(discriminator="workflow_type"), ] \ No newline at end of file diff --git a/docutranslate/workflow/cad_workflow.py b/docutranslate/workflow/cad_workflow.py new file mode 100644 index 00000000..c8e440b4 --- /dev/null +++ b/docutranslate/workflow/cad_workflow.py @@ -0,0 +1,205 @@ +# SPDX-FileCopyrightText: 2025 QinHan +# SPDX-License-Identifier: MPL-2.0 +"""CAD file translation workflow. + +Supports DWG/DXF files with automatic text extraction, LLM translation, +and write-back. Requires ``ezdxf`` (install with ``pip install docutranslate[cad]``). +DWG conversion requires an external tool (LibreDWG, HaoChen, or AutoCAD). +""" +from __future__ import annotations + +import json +import tempfile +from dataclasses import dataclass, field +from pathlib import Path +from typing import Self + +from docutranslate.cad.dwg_converter import DwgConverter +from docutranslate.cad.text_extractor import CadTextExtractor +from docutranslate.cad.text_applier import CadTextApplier +from docutranslate.glossary.glossary import Glossary +from docutranslate.ir.document import Document +from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator +from docutranslate.workflow.base import Workflow, WorkflowConfig + + +@dataclass(kw_only=True) +class CadWorkflowConfig(WorkflowConfig): + translator_config: TXTTranslatorConfig + cad_converter_backend: str = "auto" + insert_mode: str = "replace" + font_name: str = "Times New Roman" + font_size_reduction: int = 2 + + +class CadWorkflow(Workflow[CadWorkflowConfig, Document, Document]): + """Translate CAD files (DWG/DXF). + + Pipeline: + 1. If DWG, convert to DXF using available backend + 2. Extract text entities from DXF + 3. Translate extracted text via LLM + 4. Write translated text back to DXF + """ + + def __init__(self, config: CadWorkflowConfig): + super().__init__(config=config) + self._translator: TXTTranslator | None = None + self._dxf_path: Path | None = None + self._extraction_result = None + if config.logger: + config.translator_config.logger = config.logger + + def _ensure_dxf(self, file_path: str) -> Path: + """Convert DWG→DXF if needed, or return DXF path directly.""" + src = Path(file_path) + if src.suffix.lower() == ".dxf": + return src + + converter = DwgConverter(backend=self.config.cad_converter_backend) + tmp_dir = Path(tempfile.mkdtemp(prefix="docutranslate_cad_")) + result = converter.dwg_to_dxf(str(src), str(tmp_dir)) + if not result.success: + raise RuntimeError(f"DWG conversion failed: {result.message}") + return Path(result.output_path) + + def read_path(self, path: Path | str) -> Self: + """Read CAD file (DWG or DXF). Converts DWG→DXF if needed.""" + file_path = Path(path) + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {path}") + + self._dxf_path = self._ensure_dxf(str(file_path)) + + # Extract text from DXF + extractor = CadTextExtractor() + self._extraction_result = extractor.extract(str(self._dxf_path)) + if not self._extraction_result.success: + raise RuntimeError(f"Text extraction failed: {self._extraction_result.message}") + + if not self._extraction_result.entities: + raise RuntimeError("No translatable text found in CAD file") + + # Build a text document from extracted entities for translation + texts = [e.text for e in self._extraction_result.entities] + content = "\n".join(texts).encode("utf-8") + self.document_original = Document.from_bytes(content=content, suffix=".txt", stem="cad_texts") + return self + + def translate(self) -> Self: + """Translate extracted CAD text via LLM.""" + self.progress_tracker.update(percent=10, message="Extracting text...") + self.progress_tracker.update(percent=30, message="Translating...") + + translator_config = TXTTranslatorConfig( + base_url=self.config.translator_config.base_url, + api_key=self.config.translator_config.api_key, + model_id=self.config.translator_config.model_id, + to_lang=self.config.translator_config.to_lang, + concurrent=self.config.translator_config.concurrent, + timeout=self.config.translator_config.timeout, + retry=self.config.translator_config.retry, + thinking=self.config.translator_config.thinking, + custom_prompt=self.config.translator_config.custom_prompt, + system_proxy_enable=self.config.translator_config.system_proxy_enable, + chunk_size=self.config.translator_config.chunk_size, + temperature=self.config.translator_config.temperature, + top_p=self.config.translator_config.top_p, + ) + translator = TXTTranslator(translator_config) + self._translator = translator + + doc = self.document_original.copy() + translator.translate(doc) + self.document_translated = doc + + # Save glossary + if translator.glossary.glossary_dict: + self.progress_tracker.update(percent=95, message="Saving glossary...") + self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary.glossary_dict)) + + self.progress_tracker.update(percent=100, message="Translation complete") + return self + + async def translate_async(self) -> Self: + """Async version of translate.""" + self.progress_tracker.update(percent=10, message="Extracting text...") + self.progress_tracker.update(percent=30, message="Translating...") + + translator_config = TXTTranslatorConfig( + base_url=self.config.translator_config.base_url, + api_key=self.config.translator_config.api_key, + model_id=self.config.translator_config.model_id, + to_lang=self.config.translator_config.to_lang, + concurrent=self.config.translator_config.concurrent, + timeout=self.config.translator_config.timeout, + retry=self.config.translator_config.retry, + thinking=self.config.translator_config.thinking, + custom_prompt=self.config.translator_config.custom_prompt, + system_proxy_enable=self.config.translator_config.system_proxy_enable, + chunk_size=self.config.translator_config.chunk_size, + temperature=self.config.translator_config.temperature, + top_p=self.config.translator_config.top_p, + ) + translator = TXTTranslator(translator_config) + self._translator = translator + + doc = self.document_original.copy() + await translator.translate_async(doc) + self.document_translated = doc + + if translator.glossary.glossary_dict: + self.progress_tracker.update(percent=95, message="Saving glossary...") + self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary.glossary_dict)) + + self.progress_tracker.update(percent=100, message="Translation complete") + return self + + def apply_translations_to_dxf( + self, + output_path: str, + mode: str = "", + font_name: str = "", + font_size_reduction: int | None = None, + ) -> Self: + """Apply translated text back to DXF file.""" + if not self._extraction_result or not self._dxf_path: + raise RuntimeError("No CAD file loaded. Call read_path() first.") + + if not self.document_translated: + raise RuntimeError("No translation available. Call translate() first.") + + # Build translation map from original entities and translated text + translated_text = self.document_translated.content.decode("utf-8") + translated_lines = translated_text.split("\n") + + translation_map: dict[str, str] = {} + for i, entity in enumerate(self._extraction_result.entities): + if i < len(translated_lines): + translated = translated_lines[i].strip() + if translated: + translation_map[entity.text] = translated + + if not translation_map: + raise RuntimeError("No translations generated") + + applier = CadTextApplier() + result = applier.apply( + dxf_path=str(self._dxf_path), + output_path=output_path, + translation_map=translation_map, + mode=mode or self.config.insert_mode, + font_name=font_name or self.config.font_name, + font_size_reduction=font_size_reduction if font_size_reduction is not None else self.config.font_size_reduction, + ) + + if not result.success: + raise RuntimeError(f"Apply failed: {result.message}") + + self.document_translated = Document.from_path(output_path) + return self + + def get_statistics(self) -> dict: + if self._translator: + return self._translator.get_statistics() + return {} diff --git a/tests/test_workflow/test_cad_simple.dxf b/tests/test_workflow/test_cad_simple.dxf new file mode 100644 index 00000000..2d4021f4 --- /dev/null +++ b/tests/test_workflow/test_cad_simple.dxf @@ -0,0 +1,3116 @@ + 0 +SECTION + 2 +HEADER + 9 +$ACADVER + 1 +AC1024 + 9 +$ACADMAINTVER + 70 +6 + 9 +$DWGCODEPAGE + 3 +ANSI_1252 + 9 +$LASTSAVEDBY + 1 +ezdxf + 9 +$INSBASE + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$EXTMIN + 10 +1e+20 + 20 +1e+20 + 30 +1e+20 + 9 +$EXTMAX + 10 +-1e+20 + 20 +-1e+20 + 30 +-1e+20 + 9 +$LIMMIN + 10 +0.0 + 20 +0.0 + 9 +$LIMMAX + 10 +420.0 + 20 +297.0 + 9 +$ORTHOMODE + 70 +0 + 9 +$REGENMODE + 70 +1 + 9 +$FILLMODE + 70 +1 + 9 +$QTEXTMODE + 70 +0 + 9 +$MIRRTEXT + 70 +1 + 9 +$LTSCALE + 40 +1.0 + 9 +$ATTMODE + 70 +1 + 9 +$TEXTSIZE + 40 +2.5 + 9 +$TRACEWID + 40 +1.0 + 9 +$TEXTSTYLE + 7 +Standard + 9 +$CLAYER + 8 +0 + 9 +$CELTYPE + 6 +ByLayer + 9 +$CECOLOR + 62 +256 + 9 +$CELTSCALE + 40 +1.0 + 9 +$DISPSILH + 70 +0 + 9 +$DIMSCALE + 40 +1.0 + 9 +$DIMASZ + 40 +2.5 + 9 +$DIMEXO + 40 +0.625 + 9 +$DIMDLI + 40 +3.75 + 9 +$DIMRND + 40 +0.0 + 9 +$DIMDLE + 40 +0.0 + 9 +$DIMEXE + 40 +1.25 + 9 +$DIMTP + 40 +0.0 + 9 +$DIMTM + 40 +0.0 + 9 +$DIMTXT + 40 +2.5 + 9 +$DIMCEN + 40 +2.5 + 9 +$DIMTSZ + 40 +0.0 + 9 +$DIMTOL + 70 +0 + 9 +$DIMLIM + 70 +0 + 9 +$DIMTIH + 70 +0 + 9 +$DIMTOH + 70 +0 + 9 +$DIMSE1 + 70 +0 + 9 +$DIMSE2 + 70 +0 + 9 +$DIMTAD + 70 +1 + 9 +$DIMZIN + 70 +8 + 9 +$DIMBLK + 1 + + 9 +$DIMASO + 70 +1 + 9 +$DIMSHO + 70 +1 + 9 +$DIMPOST + 1 + + 9 +$DIMAPOST + 1 + + 9 +$DIMALT + 70 +0 + 9 +$DIMALTD + 70 +3 + 9 +$DIMALTF + 40 +0.03937007874 + 9 +$DIMLFAC + 40 +1.0 + 9 +$DIMTOFL + 70 +1 + 9 +$DIMTVP + 40 +0.0 + 9 +$DIMTIX + 70 +0 + 9 +$DIMSOXD + 70 +0 + 9 +$DIMSAH + 70 +0 + 9 +$DIMBLK1 + 1 + + 9 +$DIMBLK2 + 1 + + 9 +$DIMSTYLE + 2 +ISO-25 + 9 +$DIMCLRD + 70 +0 + 9 +$DIMCLRE + 70 +0 + 9 +$DIMCLRT + 70 +0 + 9 +$DIMTFAC + 40 +1.0 + 9 +$DIMGAP + 40 +0.625 + 9 +$DIMJUST + 70 +0 + 9 +$DIMSD1 + 70 +0 + 9 +$DIMSD2 + 70 +0 + 9 +$DIMTOLJ + 70 +0 + 9 +$DIMTZIN + 70 +8 + 9 +$DIMALTZ + 70 +0 + 9 +$DIMALTTZ + 70 +0 + 9 +$DIMUPT + 70 +0 + 9 +$DIMDEC + 70 +2 + 9 +$DIMTDEC + 70 +2 + 9 +$DIMALTU + 70 +2 + 9 +$DIMALTTD + 70 +3 + 9 +$DIMTXSTY + 7 +Standard + 9 +$DIMAUNIT + 70 +0 + 9 +$DIMADEC + 70 +0 + 9 +$DIMALTRND + 40 +0.0 + 9 +$DIMAZIN + 70 +0 + 9 +$DIMDSEP + 70 +44 + 9 +$DIMATFIT + 70 +3 + 9 +$DIMFRAC + 70 +0 + 9 +$DIMLDRBLK + 1 + + 9 +$DIMLUNIT + 70 +2 + 9 +$DIMLWD + 70 +-2 + 9 +$DIMLWE + 70 +-2 + 9 +$DIMTMOVE + 70 +0 + 9 +$DIMFXL + 40 +1.0 + 9 +$DIMFXLON + 70 +0 + 9 +$DIMJOGANG + 40 +0.785398163397 + 9 +$DIMTFILL + 70 +0 + 9 +$DIMTFILLCLR + 70 +0 + 9 +$DIMARCSYM + 70 +0 + 9 +$DIMLTYPE + 6 + + 9 +$DIMLTEX1 + 6 + + 9 +$DIMLTEX2 + 6 + + 9 +$DIMTXTDIRECTION + 70 +0 + 9 +$LUNITS + 70 +2 + 9 +$LUPREC + 70 +4 + 9 +$SKETCHINC + 40 +1.0 + 9 +$FILLETRAD + 40 +10.0 + 9 +$AUNITS + 70 +0 + 9 +$AUPREC + 70 +2 + 9 +$MENU + 1 +. + 9 +$ELEVATION + 40 +0.0 + 9 +$PELEVATION + 40 +0.0 + 9 +$THICKNESS + 40 +0.0 + 9 +$LIMCHECK + 70 +0 + 9 +$CHAMFERA + 40 +0.0 + 9 +$CHAMFERB + 40 +0.0 + 9 +$CHAMFERC + 40 +0.0 + 9 +$CHAMFERD + 40 +0.0 + 9 +$SKPOLY + 70 +0 + 9 +$TDCREATE + 40 +2461203.8739699074 + 9 +$TDUCREATE + 40 +2458532.153996898 + 9 +$TDUPDATE + 40 +2461203.8739699074 + 9 +$TDUUPDATE + 40 +2458532.1544311 + 9 +$TDINDWG + 40 +0.0 + 9 +$TDUSRTIMER + 40 +0.0 + 9 +$USRTIMER + 70 +1 + 9 +$ANGBASE + 50 +0.0 + 9 +$ANGDIR + 70 +0 + 9 +$PDMODE + 70 +0 + 9 +$PDSIZE + 40 +0.0 + 9 +$PLINEWID + 40 +0.0 + 9 +$SPLFRAME + 70 +0 + 9 +$SPLINETYPE + 70 +6 + 9 +$SPLINESEGS + 70 +8 + 9 +$HANDSEED + 5 +35 + 9 +$SURFTAB1 + 70 +6 + 9 +$SURFTAB2 + 70 +6 + 9 +$SURFTYPE + 70 +6 + 9 +$SURFU + 70 +6 + 9 +$SURFV + 70 +6 + 9 +$UCSBASE + 2 + + 9 +$UCSNAME + 2 + + 9 +$UCSORG + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSXDIR + 10 +1.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSYDIR + 10 +0.0 + 20 +1.0 + 30 +0.0 + 9 +$UCSORTHOREF + 2 + + 9 +$UCSORTHOVIEW + 70 +0 + 9 +$UCSORGTOP + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSORGBOTTOM + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSORGLEFT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSORGRIGHT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSORGFRONT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$UCSORGBACK + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSBASE + 2 + + 9 +$PUCSNAME + 2 + + 9 +$PUCSORG + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSXDIR + 10 +1.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSYDIR + 10 +0.0 + 20 +1.0 + 30 +0.0 + 9 +$PUCSORTHOREF + 2 + + 9 +$PUCSORTHOVIEW + 70 +0 + 9 +$PUCSORGTOP + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSORGBOTTOM + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSORGLEFT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSORGRIGHT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSORGFRONT + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PUCSORGBACK + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$USERI1 + 70 +0 + 9 +$USERI2 + 70 +0 + 9 +$USERI3 + 70 +0 + 9 +$USERI4 + 70 +0 + 9 +$USERI5 + 70 +0 + 9 +$USERR1 + 40 +0.0 + 9 +$USERR2 + 40 +0.0 + 9 +$USERR3 + 40 +0.0 + 9 +$USERR4 + 40 +0.0 + 9 +$USERR5 + 40 +0.0 + 9 +$WORLDVIEW + 70 +1 + 9 +$SHADEDGE + 70 +3 + 9 +$SHADEDIF + 70 +70 + 9 +$TILEMODE + 70 +1 + 9 +$MAXACTVP + 70 +64 + 9 +$PINSBASE + 10 +0.0 + 20 +0.0 + 30 +0.0 + 9 +$PLIMCHECK + 70 +0 + 9 +$PEXTMIN + 10 +1e+20 + 20 +1e+20 + 30 +1e+20 + 9 +$PEXTMAX + 10 +-1e+20 + 20 +-1e+20 + 30 +-1e+20 + 9 +$PLIMMIN + 10 +0.0 + 20 +0.0 + 9 +$PLIMMAX + 10 +420.0 + 20 +297.0 + 9 +$UNITMODE + 70 +0 + 9 +$VISRETAIN + 70 +1 + 9 +$PLINEGEN + 70 +0 + 9 +$PSLTSCALE + 70 +1 + 9 +$TREEDEPTH + 70 +3020 + 9 +$CMLSTYLE + 2 +Standard + 9 +$CMLJUST + 70 +0 + 9 +$CMLSCALE + 40 +20.0 + 9 +$PROXYGRAPHICS + 70 +1 + 9 +$MEASUREMENT + 70 +1 + 9 +$CELWEIGHT +370 +-1 + 9 +$ENDCAPS +280 +0 + 9 +$JOINSTYLE +280 +0 + 9 +$LWDISPLAY +290 +0 + 9 +$INSUNITS + 70 +6 + 9 +$HYPERLINKBASE + 1 + + 9 +$STYLESHEET + 1 + + 9 +$XEDIT +290 +1 + 9 +$CEPSNTYPE +380 +0 + 9 +$PSTYLEMODE +290 +1 + 9 +$FINGERPRINTGUID + 2 +{A1C618C0-8A80-459C-B665-48F68F170B96} + 9 +$VERSIONGUID + 2 +{DE892FC3-25F5-4FF4-AB47-7E8732880CEA} + 9 +$EXTNAMES +290 +1 + 9 +$PSVPSCALE + 40 +0.0 + 9 +$OLESTARTUP +290 +0 + 9 +$SORTENTS +280 +127 + 9 +$INDEXCTL +280 +0 + 9 +$HIDETEXT +280 +1 + 9 +$XCLIPFRAME +280 +1 + 9 +$HALOGAP +280 +0 + 9 +$OBSCOLOR + 70 +257 + 9 +$OBSLTYPE +280 +0 + 9 +$INTERSECTIONDISPLAY +280 +0 + 9 +$INTERSECTIONCOLOR + 70 +257 + 9 +$DIMASSOC +280 +2 + 9 +$PROJECTNAME + 1 + + 9 +$CAMERADISPLAY +290 +0 + 9 +$LENSLENGTH + 40 +50.0 + 9 +$CAMERAHEIGHT + 40 +0.0 + 9 +$STEPSPERSEC + 40 +24.0 + 9 +$STEPSIZE + 40 +100.0 + 9 +$3DDWFPREC + 40 +2.0 + 9 +$PSOLWIDTH + 40 +0.005 + 9 +$PSOLHEIGHT + 40 +0.08 + 9 +$LOFTANG1 + 40 +1.570796326795 + 9 +$LOFTANG2 + 40 +1.570796326795 + 9 +$LOFTMAG1 + 40 +0.0 + 9 +$LOFTMAG2 + 40 +0.0 + 9 +$LOFTPARAM + 70 +7 + 9 +$LOFTNORMALS +280 +1 + 9 +$LATITUDE + 40 +37.795 + 9 +$LONGITUDE + 40 +-122.394 + 9 +$NORTHDIRECTION + 40 +0.0 + 9 +$TIMEZONE + 70 +-8000 + 9 +$LIGHTGLYPHDISPLAY +280 +1 + 9 +$TILEMODELIGHTSYNCH +280 +1 + 9 +$CMATERIAL +347 +20 + 9 +$SOLIDHIST +280 +0 + 9 +$SHOWHIST +280 +1 + 9 +$DWFFRAME +280 +2 + 9 +$DGNFRAME +280 +2 + 9 +$REALWORLDSCALE +290 +1 + 9 +$INTERFERECOLOR + 62 +256 + 9 +$CSHADOW +280 +0 + 9 +$SHADOWPLANELOCATION + 40 +0.0 + 0 +ENDSEC + 0 +SECTION + 2 +CLASSES + 0 +CLASS + 1 +ACDBDICTIONARYWDFLT + 2 +AcDbDictionaryWithDefault + 3 +ObjectDBX Classes + 90 +0 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +SUN + 2 +AcDbSun + 3 +SCENEOE + 90 +1153 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +VISUALSTYLE + 2 +AcDbVisualStyle + 3 +ObjectDBX Classes + 90 +4095 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +MATERIAL + 2 +AcDbMaterial + 3 +ObjectDBX Classes + 90 +1153 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +SCALE + 2 +AcDbScale + 3 +ObjectDBX Classes + 90 +1153 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +TABLESTYLE + 2 +AcDbTableStyle + 3 +ObjectDBX Classes + 90 +4095 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +MLEADERSTYLE + 2 +AcDbMLeaderStyle + 3 +ACDB_MLEADERSTYLE_CLASS + 90 +4095 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +DICTIONARYVAR + 2 +AcDbDictionaryVar + 3 +ObjectDBX Classes + 90 +0 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +CELLSTYLEMAP + 2 +AcDbCellStyleMap + 3 +ObjectDBX Classes + 90 +1152 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +MENTALRAYRENDERSETTINGS + 2 +AcDbMentalRayRenderSettings + 3 +SCENEOE + 90 +1024 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +ACDBDETAILVIEWSTYLE + 2 +AcDbDetailViewStyle + 3 +ObjectDBX Classes + 90 +1025 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +ACDBSECTIONVIEWSTYLE + 2 +AcDbSectionViewStyle + 3 +ObjectDBX Classes + 90 +1025 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +RASTERVARIABLES + 2 +AcDbRasterVariables + 3 +ISM + 90 +0 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +ACDBPLACEHOLDER + 2 +AcDbPlaceHolder + 3 +ObjectDBX Classes + 90 +0 + 91 +0 +280 +0 +281 +0 + 0 +CLASS + 1 +LAYOUT + 2 +AcDbLayout + 3 +ObjectDBX Classes + 90 +0 + 91 +0 +280 +0 +281 +0 + 0 +ENDSEC + 0 +SECTION + 2 +TABLES + 0 +TABLE + 2 +VPORT + 5 +8 +330 +0 +100 +AcDbSymbolTable + 70 +1 + 0 +VPORT + 5 +23 +330 +8 +100 +AcDbSymbolTableRecord +100 +AcDbViewportTableRecord + 2 +*Active + 70 +0 + 10 +0.0 + 20 +0.0 + 11 +1.0 + 21 +1.0 + 12 +0.0 + 22 +0.0 + 13 +0.0 + 23 +0.0 + 14 +0.5 + 24 +0.5 + 15 +0.5 + 25 +0.5 + 16 +0.0 + 26 +0.0 + 36 +1.0 + 17 +0.0 + 27 +0.0 + 37 +0.0 + 40 +1000.0 + 41 +1.34 + 42 +50.0 + 43 +0.0 + 44 +0.0 + 50 +0.0 + 51 +0.0 + 71 +0 + 72 +1000 + 73 +1 + 74 +3 + 75 +0 + 76 +0 + 77 +0 + 78 +0 +281 +0 + 65 +0 +146 +0.0 + 0 +ENDTAB + 0 +TABLE + 2 +LTYPE + 5 +2 +330 +0 +100 +AcDbSymbolTable + 70 +3 + 0 +LTYPE + 5 +24 +330 +2 +100 +AcDbSymbolTableRecord +100 +AcDbLinetypeTableRecord + 2 +ByBlock + 70 +0 + 3 + + 72 +65 + 73 +0 + 40 +0.0 + 0 +LTYPE + 5 +25 +330 +2 +100 +AcDbSymbolTableRecord +100 +AcDbLinetypeTableRecord + 2 +ByLayer + 70 +0 + 3 + + 72 +65 + 73 +0 + 40 +0.0 + 0 +LTYPE + 5 +26 +330 +2 +100 +AcDbSymbolTableRecord +100 +AcDbLinetypeTableRecord + 2 +Continuous + 70 +0 + 3 + + 72 +65 + 73 +0 + 40 +0.0 + 0 +ENDTAB + 0 +TABLE + 2 +LAYER + 5 +1 +330 +0 +100 +AcDbSymbolTable + 70 +2 + 0 +LAYER + 5 +27 +330 +1 +100 +AcDbSymbolTableRecord +100 +AcDbLayerTableRecord + 2 +0 + 70 +0 + 62 +7 + 6 +Continuous +370 +-3 +390 +13 +347 +21 + 0 +LAYER + 5 +28 +330 +1 +100 +AcDbSymbolTableRecord +100 +AcDbLayerTableRecord + 2 +Defpoints + 70 +0 + 62 +7 + 6 +Continuous +290 +0 +370 +-3 +390 +13 +347 +21 + 0 +ENDTAB + 0 +TABLE + 2 +STYLE + 5 +5 +330 +0 +100 +AcDbSymbolTable + 70 +1 + 0 +STYLE + 5 +29 +330 +5 +100 +AcDbSymbolTableRecord +100 +AcDbTextStyleTableRecord + 2 +Standard + 70 +0 + 40 +0.0 + 41 +1.0 + 50 +0.0 + 71 +0 + 42 +2.5 + 3 +txt + 4 + + 0 +ENDTAB + 0 +TABLE + 2 +VIEW + 5 +7 +330 +0 +100 +AcDbSymbolTable + 70 +0 + 0 +ENDTAB + 0 +TABLE + 2 +UCS + 5 +6 +330 +0 +100 +AcDbSymbolTable + 70 +0 + 0 +ENDTAB + 0 +TABLE + 2 +APPID + 5 +3 +330 +0 +100 +AcDbSymbolTable + 70 +3 + 0 +APPID + 5 +2A +330 +3 +100 +AcDbSymbolTableRecord +100 +AcDbRegAppTableRecord + 2 +ACAD + 70 +0 + 0 +APPID + 5 +32 +330 +3 +100 +AcDbSymbolTableRecord +100 +AcDbRegAppTableRecord + 2 +HATCHBACKGROUNDCOLOR + 70 +0 + 0 +APPID + 5 +33 +330 +3 +100 +AcDbSymbolTableRecord +100 +AcDbRegAppTableRecord + 2 +EZDXF + 70 +0 + 0 +ENDTAB + 0 +TABLE + 2 +DIMSTYLE + 5 +4 +330 +0 +100 +AcDbSymbolTable + 70 +1 +100 +AcDbDimStyleTable + 0 +DIMSTYLE +105 +2B +330 +4 +100 +AcDbSymbolTableRecord +100 +AcDbDimStyleTableRecord + 2 +Standard + 70 +0 + 40 +1.0 + 41 +2.5 + 42 +0.625 + 43 +3.75 + 44 +1.25 + 45 +0.0 + 46 +0.0 + 47 +0.0 + 48 +0.0 + 49 +2.5 +140 +2.5 +141 +2.5 +142 +0.0 +143 +0.03937007874 +144 +1.0 +145 +0.0 +146 +1.0 +147 +0.625 +148 +0.0 + 69 +0 + 70 +0 + 71 +0 + 72 +0 + 73 +0 + 74 +0 + 75 +0 + 76 +0 + 77 +1 + 78 +8 + 79 +3 +170 +0 +171 +3 +172 +1 +173 +0 +174 +0 +175 +0 +176 +0 +177 +0 +178 +0 +179 +2 +271 +2 +272 +2 +273 +2 +274 +3 +275 +0 +276 +0 +277 +2 +278 +44 +279 +0 +280 +0 +281 +0 +282 +0 +283 +0 +284 +8 +285 +0 +286 +0 +288 +0 +289 +3 +290 +0 +371 +-2 +372 +-2 + 0 +ENDTAB + 0 +TABLE + 2 +BLOCK_RECORD + 5 +9 +330 +0 +100 +AcDbSymbolTable + 70 +2 + 0 +BLOCK_RECORD + 5 +17 +330 +9 +100 +AcDbSymbolTableRecord +100 +AcDbBlockTableRecord + 2 +*Model_Space +340 +1A + 70 +0 +280 +1 +281 +0 + 0 +BLOCK_RECORD + 5 +1B +330 +9 +100 +AcDbSymbolTableRecord +100 +AcDbBlockTableRecord + 2 +*Paper_Space +340 +1E + 70 +0 +280 +1 +281 +0 + 0 +ENDTAB + 0 +ENDSEC + 0 +SECTION + 2 +BLOCKS + 0 +BLOCK + 5 +18 +330 +17 +100 +AcDbEntity + 8 +0 +100 +AcDbBlockBegin + 2 +*Model_Space + 70 +0 + 10 +0.0 + 20 +0.0 + 30 +0.0 + 3 +*Model_Space + 1 + + 0 +ENDBLK + 5 +19 +330 +17 +100 +AcDbEntity + 8 +0 +100 +AcDbBlockEnd + 0 +BLOCK + 5 +1C +330 +1B +100 +AcDbEntity + 8 +0 +100 +AcDbBlockBegin + 2 +*Paper_Space + 70 +0 + 10 +0.0 + 20 +0.0 + 30 +0.0 + 3 +*Paper_Space + 1 + + 0 +ENDBLK + 5 +1D +330 +1B +100 +AcDbEntity + 8 +0 +100 +AcDbBlockEnd + 0 +ENDSEC + 0 +SECTION + 2 +ENTITIES + 0 +TEXT + 5 +2F +330 +17 +100 +AcDbEntity + 8 +0 +100 +AcDbText + 10 +0.0 + 20 +0.0 + 30 +0.0 + 40 +2.5 + 1 +Hello World +100 +AcDbText + 0 +TEXT + 5 +30 +330 +17 +100 +AcDbEntity + 8 +0 +100 +AcDbText + 10 +10.0 + 20 +5.0 + 30 +0.0 + 40 +3.0 + 1 +Test Label +100 +AcDbText + 0 +MTEXT + 5 +31 +330 +17 +100 +AcDbEntity + 8 +0 +100 +AcDbMText + 10 +20.0 + 20 +10.0 + 30 +0.0 + 40 +2.0 + 71 +1 + 1 +This is a multi-line text\PSecond line + 0 +ENDSEC + 0 +SECTION + 2 +OBJECTS + 0 +DICTIONARY + 5 +A +330 +0 +100 +AcDbDictionary +281 +1 + 3 +ACAD_COLOR +350 +B + 3 +ACAD_GROUP +350 +C + 3 +ACAD_LAYOUT +350 +D + 3 +ACAD_MATERIAL +350 +E + 3 +ACAD_MLEADERSTYLE +350 +F + 3 +ACAD_MLINESTYLE +350 +10 + 3 +ACAD_PLOTSETTINGS +350 +11 + 3 +ACAD_PLOTSTYLENAME +350 +12 + 3 +ACAD_SCALELIST +350 +14 + 3 +ACAD_TABLESTYLE +350 +15 + 3 +ACAD_VISUALSTYLE +350 +16 + 3 +EZDXF_META +350 +2D + 0 +DICTIONARY + 5 +B +330 +A +100 +AcDbDictionary +281 +1 + 0 +DICTIONARY + 5 +C +330 +A +100 +AcDbDictionary +281 +1 + 0 +DICTIONARY + 5 +D +330 +A +100 +AcDbDictionary +281 +1 + 3 +Model +350 +1A + 3 +Layout1 +350 +1E + 0 +DICTIONARY + 5 +E +330 +A +100 +AcDbDictionary +281 +1 + 3 +ByBlock +350 +1F + 3 +ByLayer +350 +20 + 3 +Global +350 +21 + 0 +DICTIONARY + 5 +F +330 +A +100 +AcDbDictionary +281 +1 + 3 +Standard +350 +2C + 0 +DICTIONARY + 5 +10 +330 +A +100 +AcDbDictionary +281 +1 + 3 +Standard +350 +22 + 0 +DICTIONARY + 5 +11 +330 +A +100 +AcDbDictionary +281 +1 + 0 +ACDBDICTIONARYWDFLT + 5 +12 +330 +A +100 +AcDbDictionary +281 +1 + 3 +Normal +350 +13 +100 +AcDbDictionaryWithDefault +340 +13 + 0 +ACDBPLACEHOLDER + 5 +13 +330 +12 + 0 +DICTIONARY + 5 +14 +330 +A +100 +AcDbDictionary +281 +1 + 0 +DICTIONARY + 5 +15 +330 +A +100 +AcDbDictionary +281 +1 + 0 +DICTIONARY + 5 +16 +330 +A +100 +AcDbDictionary +281 +1 + 0 +LAYOUT + 5 +1A +330 +D +100 +AcDbPlotSettings + 1 + + 4 +A3 + 6 + + 40 +7.5 + 41 +20.0 + 42 +7.5 + 43 +20.0 + 44 +420.0 + 45 +297.0 + 46 +0.0 + 47 +0.0 + 48 +0.0 + 49 +0.0 +140 +0.0 +141 +0.0 +142 +1.0 +143 +1.0 + 70 +1024 + 72 +1 + 73 +0 + 74 +5 + 7 + + 75 +16 + 76 +0 + 77 +2 + 78 +300 +147 +1.0 +148 +0.0 +149 +0.0 +100 +AcDbLayout + 1 +Model + 70 +1 + 71 +0 + 10 +0.0 + 20 +0.0 + 11 +420.0 + 21 +297.0 + 12 +0.0 + 22 +0.0 + 32 +0.0 + 14 +1e+20 + 24 +1e+20 + 34 +1e+20 + 15 +-1e+20 + 25 +-1e+20 + 35 +-1e+20 +146 +0.0 + 13 +0.0 + 23 +0.0 + 33 +0.0 + 16 +1.0 + 26 +0.0 + 36 +0.0 + 17 +0.0 + 27 +1.0 + 37 +0.0 + 76 +1 +330 +17 + 0 +LAYOUT + 5 +1E +330 +D +100 +AcDbPlotSettings + 1 + + 4 +A3 + 6 + + 40 +7.5 + 41 +20.0 + 42 +7.5 + 43 +20.0 + 44 +420.0 + 45 +297.0 + 46 +0.0 + 47 +0.0 + 48 +0.0 + 49 +0.0 +140 +0.0 +141 +0.0 +142 +1.0 +143 +1.0 + 70 +0 + 72 +1 + 73 +0 + 74 +5 + 7 + + 75 +16 + 76 +0 + 77 +2 + 78 +300 +147 +1.0 +148 +0.0 +149 +0.0 +100 +AcDbLayout + 1 +Layout1 + 70 +1 + 71 +1 + 10 +0.0 + 20 +0.0 + 11 +420.0 + 21 +297.0 + 12 +0.0 + 22 +0.0 + 32 +0.0 + 14 +1e+20 + 24 +1e+20 + 34 +1e+20 + 15 +-1e+20 + 25 +-1e+20 + 35 +-1e+20 +146 +0.0 + 13 +0.0 + 23 +0.0 + 33 +0.0 + 16 +1.0 + 26 +0.0 + 36 +0.0 + 17 +0.0 + 27 +1.0 + 37 +0.0 + 76 +1 +330 +1B + 0 +MATERIAL + 5 +1F +102 +{ACAD_REACTORS +330 +E +102 +} +330 +E +100 +AcDbMaterial + 1 +ByBlock + 2 + + 70 +0 + 40 +1.0 + 71 +1 + 41 +1.0 + 91 +-1023410177 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 44 +0.5 + 73 +0 + 45 +1.0 + 46 +1.0 + 77 +1 + 4 + + 78 +1 + 79 +1 +170 +1 + 48 +1.0 +171 +1 + 6 + +172 +1 +173 +1 +174 +1 +140 +1.0 +141 +1.0 +175 +1 + 7 + +176 +1 +177 +1 +178 +1 +143 +1.0 +179 +1 + 8 + +270 +1 +271 +1 +272 +1 +145 +1.0 +146 +1.0 +273 +1 + 9 + +274 +1 +275 +1 +276 +1 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 94 +63 + 0 +MATERIAL + 5 +20 +102 +{ACAD_REACTORS +330 +E +102 +} +330 +E +100 +AcDbMaterial + 1 +ByLayer + 2 + + 70 +0 + 40 +1.0 + 71 +1 + 41 +1.0 + 91 +-1023410177 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 44 +0.5 + 73 +0 + 45 +1.0 + 46 +1.0 + 77 +1 + 4 + + 78 +1 + 79 +1 +170 +1 + 48 +1.0 +171 +1 + 6 + +172 +1 +173 +1 +174 +1 +140 +1.0 +141 +1.0 +175 +1 + 7 + +176 +1 +177 +1 +178 +1 +143 +1.0 +179 +1 + 8 + +270 +1 +271 +1 +272 +1 +145 +1.0 +146 +1.0 +273 +1 + 9 + +274 +1 +275 +1 +276 +1 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 94 +63 + 0 +MATERIAL + 5 +21 +102 +{ACAD_REACTORS +330 +E +102 +} +330 +E +100 +AcDbMaterial + 1 +Global + 2 + + 70 +0 + 40 +1.0 + 71 +1 + 41 +1.0 + 91 +-1023410177 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 44 +0.5 + 73 +0 + 45 +1.0 + 46 +1.0 + 77 +1 + 4 + + 78 +1 + 79 +1 +170 +1 + 48 +1.0 +171 +1 + 6 + +172 +1 +173 +1 +174 +1 +140 +1.0 +141 +1.0 +175 +1 + 7 + +176 +1 +177 +1 +178 +1 +143 +1.0 +179 +1 + 8 + +270 +1 +271 +1 +272 +1 +145 +1.0 +146 +1.0 +273 +1 + 9 + +274 +1 +275 +1 +276 +1 + 42 +1.0 + 72 +1 + 3 + + 73 +1 + 74 +1 + 75 +1 + 94 +63 + 0 +MLINESTYLE + 5 +22 +102 +{ACAD_REACTORS +330 +10 +102 +} +330 +10 +100 +AcDbMlineStyle + 2 +Standard + 70 +0 + 3 + + 62 +256 + 51 +90.0 + 52 +90.0 + 71 +2 + 49 +0.5 + 62 +256 + 6 +BYLAYER + 49 +-0.5 + 62 +256 + 6 +BYLAYER + 0 +MLEADERSTYLE + 5 +2C +102 +{ACAD_REACTORS +330 +F +102 +} +330 +F +100 +AcDbMLeaderStyle +179 +2 +170 +2 +171 +1 +172 +0 + 90 +2 + 40 +0.0 + 41 +0.0 +173 +1 + 91 +-1056964608 + 92 +-2 +290 +1 + 42 +2.0 +291 +1 + 43 +8.0 + 3 +Standard + 44 +4.0 +300 + +342 +29 +174 +1 +175 +1 +176 +0 +178 +1 + 93 +-1056964608 + 45 +4.0 +292 +0 +297 +0 + 46 +4.0 + 94 +-1056964608 + 47 +1.0 + 49 +1.0 +140 +1.0 +294 +1 +141 +0.0 +177 +0 +142 +1.0 +295 +0 +296 +0 +143 +3.75 +271 +0 +272 +9 +273 +9 + 0 +DICTIONARY + 5 +2D +330 +A +100 +AcDbDictionary +280 +1 +281 +1 + 3 +CREATED_BY_EZDXF +350 +2E + 3 +WRITTEN_BY_EZDXF +350 +34 + 0 +DICTIONARYVAR + 5 +2E +330 +2D +100 +DictionaryVariables +280 +0 + 1 +1.4.2 @ 2026-06-11T12:58:31.109748+00:00 + 0 +DICTIONARYVAR + 5 +34 +330 +2D +100 +DictionaryVariables +280 +0 + 1 +1.4.2 @ 2026-06-11T12:58:31.110750+00:00 + 0 +ENDSEC + 0 +EOF diff --git a/tests/test_workflow/test_cad_workflow.py b/tests/test_workflow/test_cad_workflow.py new file mode 100644 index 00000000..1e254d29 --- /dev/null +++ b/tests/test_workflow/test_cad_workflow.py @@ -0,0 +1,205 @@ +import shutil +import pytest +from pathlib import Path +from unittest.mock import patch + +from docutranslate.cad.text_extractor import CadTextExtractor +from docutranslate.cad.text_applier import CadTextApplier +from docutranslate.cad.dwg_converter import DwgConverter + +TESTS_DIR = Path(__file__).parent +TEST_DWG = TESTS_DIR / "test_cad_input.dwg" +TEST_DXF = TESTS_DIR / "test_cad_simple.dxf" + + +def _libredwg_available() -> bool: + return shutil.which("dwg2dxf") is not None + + +requires_libredwg = pytest.mark.skipif( + not _libredwg_available() or not TEST_DWG.exists(), + reason="LibreDWG (dwg2dxf) or test DWG file not available" +) + + +# ====================================================================== +# CadTextExtractor +# ====================================================================== + +def test_extractor_returns_error_without_ezdxf(): + """Extractor should return error if ezdxf is not installed.""" + with patch.dict("sys.modules", {"ezdxf": None}): + extractor = CadTextExtractor() + result = extractor.extract("nonexistent.dxf") + assert result.success is False + assert "ezdxf" in result.message.lower() or "install" in result.message.lower() + + +def test_extractor_returns_error_for_missing_file(): + extractor = CadTextExtractor() + result = extractor.extract("nonexistent.dxf") + assert result.success is False + assert "not found" in result.message.lower() + + +# ====================================================================== +# CadTextApplier +# ====================================================================== + +def test_applier_returns_error_without_ezdxf(): + with patch.dict("sys.modules", {"ezdxf": None}): + applier = CadTextApplier() + result = applier.apply("a.dxf", "b.dxf", {"hello": "world"}) + assert result.success is False + assert "ezdxf" in result.message.lower() or "install" in result.message.lower() + + +def test_applier_returns_error_for_missing_file(): + applier = CadTextApplier() + result = applier.apply("nonexistent.dxf", "output.dxf", {"hello": "world"}) + assert result.success is False + assert "not found" in result.message.lower() + + +# ====================================================================== +# DwgConverter +# ====================================================================== + +def test_converter_dxf_passthrough(tmp_path): + """If input is already DXF, just copy it.""" + dxf_file = tmp_path / "test.dxf" + dxf_file.write_text("DXF content", encoding="utf-8") + + converter = DwgConverter(backend="dxf_only") + result = converter.dwg_to_dxf(str(dxf_file), str(tmp_path / "output")) + + assert result.success is True + assert result.output_path is not None + assert Path(result.output_path).exists() + + +def test_converter_no_backend(tmp_path): + dwg_file = tmp_path / "test.dwg" + dwg_file.write_text("fake dwg", encoding="utf-8") + converter = DwgConverter(backend="dxf_only") + result = converter.dwg_to_dxf(str(dwg_file), str(tmp_path / "output")) + assert result.success is False + assert "dxf_only" in result.message.lower() or "already dxf" in result.message.lower() + + +def test_converter_detect_available(): + converter = DwgConverter() + available = converter.detect_available() + assert isinstance(available, dict) + assert "libredwg" in available + + +def test_converter_missing_file(): + converter = DwgConverter() + result = converter.dwg_to_dxf("nonexistent.dwg", "output") + assert result.success is False + assert "not found" in result.message.lower() + + +# ====================================================================== +# Integration Tests (require LibreDWG + test DWG file) +# ====================================================================== + +@requires_libredwg +def test_dwg_to_dxf_conversion(tmp_path): + """Convert real DWG file to DXF using LibreDWG.""" + converter = DwgConverter(backend="libredwg") + output_dir = tmp_path / "output" + result = converter.dwg_to_dxf(str(TEST_DWG), str(output_dir)) + + assert result.success is True, f"Conversion failed: {result.message}" + assert result.output_path is not None + assert Path(result.output_path).exists() + assert result.output_path.endswith(".dxf") + assert result.backend_used == "libredwg" + + +def test_extract_text_from_dxf(): + """Extract text entities from a DXF file.""" + if not TEST_DXF.exists(): + pytest.skip("Test DXF file not found") + + extractor = CadTextExtractor() + result = extractor.extract(str(TEST_DXF)) + + assert result.success is True, f"Extraction failed: {result.message}" + assert len(result.entities) >= 2, f"Expected at least 2 entities, got {len(result.entities)}" + + texts = [e.text for e in result.entities] + assert "Hello World" in texts + assert "Test Label" in texts + + for entity in result.entities: + assert entity.text, "Entity text should not be empty" + assert entity.entity_type in ("TEXT", "MTEXT", "ATTDEF", "ATTRIB") + + +def test_apply_translations_to_dxf(tmp_path): + """Apply translations to a DXF file and verify output.""" + if not TEST_DXF.exists(): + pytest.skip("Test DXF file not found") + + extractor = CadTextExtractor() + extract_result = extractor.extract(str(TEST_DXF)) + assert extract_result.success is True + + translation_map = {} + for entity in extract_result.entities: + if entity.entity_type == "TEXT": + translation_map[entity.text] = f"翻译_{entity.text}" + + applier = CadTextApplier() + output_dxf = tmp_path / "translated.dxf" + apply_result = applier.apply( + str(TEST_DXF), + str(output_dxf), + translation_map, + ) + + assert apply_result.success is True, f"Apply failed: {apply_result.message}" + assert apply_result.translated_count > 0 + assert output_dxf.exists() + + verify = CadTextExtractor() + verify_result = verify.extract(str(output_dxf)) + assert verify_result.success is True + translated_texts = [e.text for e in verify_result.entities] + assert any(t.startswith("翻译_") for t in translated_texts), "No translated text found" + + +def test_full_cad_pipeline(tmp_path): + """Full pipeline: DXF → Extract → Translate → Apply.""" + if not TEST_DXF.exists(): + pytest.skip("Test DXF file not found") + + extractor = CadTextExtractor() + extract_result = extractor.extract(str(TEST_DXF)) + assert extract_result.success is True + assert len(extract_result.entities) > 0 + + translation_map = {} + for entity in extract_result.entities: + translation_map[entity.text] = f"[CN] {entity.text}" + + applier = CadTextApplier() + final_dxf = tmp_path / "final.dxf" + apply_result = applier.apply( + str(TEST_DXF), + str(final_dxf), + translation_map, + mode="replace", + font_name="SimSun", + ) + assert apply_result.success is True + assert final_dxf.exists() + + verify = CadTextExtractor() + verify_result = verify.extract(str(final_dxf)) + assert verify_result.success is True + has_translated = any(e.text.startswith("[CN]") for e in verify_result.entities) + assert has_translated, "No translated text found in output DXF"