ctoolbox/formats/eite/
transform.rs

1use anyhow::{Result, bail};
2
3use crate::formats::eite::dc::data::dc_data_filter_by_value;
4use crate::formats::eite::formats::{DcOutputLanguage, PrefilterSettings};
5
6pub enum DocumentTransformation {
7    SemanticToText { lang: DcOutputLanguage },
8    CodeToText { lang: DcOutputLanguage },
9}
10
11impl DocumentTransformation {
12    pub fn apply(&self, dc_array_in: &[u32]) -> Result<Vec<u32>> {
13        apply_document_transformation(self, dc_array_in)
14    }
15
16    pub fn from_str(transform: &str, lang: DcOutputLanguage) -> Result<Self> {
17        match transform {
18            "semanticToText" => {
19                Ok(DocumentTransformation::SemanticToText { lang })
20            }
21            "codeToText" => Ok(DocumentTransformation::CodeToText { lang }),
22            _ => bail!("Unknown document transformation: {transform}"),
23        }
24    }
25
26    pub fn semantic_to_text_default() -> Self {
27        DocumentTransformation::SemanticToText {
28            lang: DcOutputLanguage::default(),
29        }
30    }
31
32    pub fn code_to_text_default() -> Self {
33        DocumentTransformation::CodeToText {
34            lang: DcOutputLanguage::default(),
35        }
36    }
37}
38
39impl std::fmt::Display for DocumentTransformation {
40    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41        match self {
42            DocumentTransformation::SemanticToText { lang } => {
43                writeln!(f, "Semantic to Text ({lang}):")?;
44            }
45            DocumentTransformation::CodeToText { lang } => {
46                writeln!(f, "Code to Text ({lang}):")?;
47            }
48        }
49        Ok(())
50    }
51}
52
53pub fn list_document_transformations() -> Vec<String> {
54    dc_data_filter_by_value("formats", 6, "transformation", 1)
55}
56
57/// Supported document transformations (static list as per original).
58pub fn is_supported_document_transformation(transform: &str) -> bool {
59    matches!(transform, "semanticToText" | "codeToText")
60}
61
62/// Assert the document transformation is supported.
63pub fn assert_supported_document_transformation(transform: &str) -> Result<()> {
64    if !is_supported_document_transformation(transform) {
65        bail!("Unsupported document transformation: {transform}");
66    }
67    Ok(())
68}
69
70/// Apply a document transformation producing a new Dc array.
71/// semanticToText -> `dct_semantic_to_text`
72/// codeToText     -> `dct_code_to_text`
73pub fn apply_document_transformation(
74    transform: &DocumentTransformation,
75    dc_array_in: &[u32],
76) -> Result<Vec<u32>> {
77    match transform {
78        DocumentTransformation::SemanticToText { lang } => {
79            dct_semantic_to_text(dc_array_in, lang)
80        }
81        DocumentTransformation::CodeToText { lang } => {
82            dct_code_to_text(dc_array_in, lang)
83        }
84    }
85}
86
87/// Preprocess (prefilters) for an output format by optionally applying:
88///  - Semantic to Text conversion if setting '`prefilter_semantic`' present.
89///  - Code to Text conversion if setting '`prefilter_code`' present.
90/// Mirrors original dcPreprocessForFormat.
91pub fn apply_prefilters(
92    dc_array_in: &[u32],
93    settings: &PrefilterSettings,
94) -> Result<Vec<u32>> {
95    settings.apply(dc_array_in)
96}
97
98/// Convert semantic Dc sequence to text Dc sequence (placeholder).
99pub fn dct_semantic_to_text(
100    dc_array: &[u32],
101    _output_language: &DcOutputLanguage,
102) -> Result<Vec<u32>> {
103    // Placeholder pass-through (replace with actual logic if already implemented elsewhere).
104    Ok(dc_array.to_vec())
105}
106
107/// Convert code Dc sequence to text Dc sequence (placeholder).
108pub fn dct_code_to_text(
109    dc_array: &[u32],
110    _output_language: &DcOutputLanguage,
111) -> Result<Vec<u32>> {
112    // Placeholder pass-through (replace with actual logic if already implemented elsewhere).
113    Ok(dc_array.to_vec())
114}