1pub mod data;
2use anyhow::{Result, anyhow, ensure};
3
4use crate::formats::base64::{
5 bytes_to_standard_base64, decimal_to_standard_base64,
6 standard_base64_to_bytes, standard_base64_to_decimal,
7};
8use crate::formats::eite::dc::data::{
9 DCDATA_BIDI_CLASS_COL, DCDATA_CASING_COL, DCDATA_COMBINING_CLASS_COL,
10 DCDATA_COMPLEX_TRAITS_COL, DCDATA_DESCRIPTION_COL, DCDATA_NAME_COL,
11 DCDATA_SCRIPT_COL, DCDATA_TYPE_COL, dc_data_lookup_by_id,
12 dc_dataset_length, is_dc_dataset,
13};
14use crate::formats::eite::util::string::substring_bug_compatible;
15
16pub const DC_REPLACEMENT_UNAVAIL_DC: u32 = 207;
18
19pub const DC_REPLACEMENT_UNAVAIL_UNICODE: u32 = 206;
21
22pub const DC_ESCAPE_NEXT: u32 = 255;
23
24pub const DC_START_ENCAPSULATION_UTF8: u32 = 191;
25pub const DC_END_ENCAPSULATION_UTF8: u32 = 192;
26
27pub const DC_START_ENCAPSULATION_BINARY: u32 = 203;
28pub const DC_END_ENCAPSULATION_BINARY: u32 = 204;
29
30pub fn is_known_dc(v: u32) -> bool {
33 v <= u32::try_from(maximum_known_dc())
34 .expect("Failed to convert maximum_known_dc to u32")
35}
36
37pub fn maximum_known_dc() -> usize {
38 dc_dataset_length("DcData")
40 .checked_sub(1)
41 .expect("Failed to get maximum known Dc")
42}
43
44pub fn dc_is_newline(dc: u32) -> bool {
46 matches!(dc, 119 | 120 | 121 | 240 | 294 | 295)
48}
49
50pub fn dc_is_space(dc: u32) -> Result<bool> {
52 ensure!(is_known_dc(dc), "Unknown Dc {dc}");
53 Ok(dc_get_type(dc)? == "Zs")
54}
55
56pub fn dc_is_printable(dc: u32) -> Result<bool> {
59 ensure!(is_known_dc(dc), "Unknown Dc {dc}");
60 let t = dc_get_type(dc)?;
61 if t == "Zl" || t == "Zp" {
62 return Ok(false);
63 }
64 let general = t.chars().next().unwrap_or(' ');
65 if general == '!' || general == 'C' {
66 return Ok(false);
67 }
68 Ok(true)
69}
70
71pub fn dc_is_el_code(dc: u32) -> Result<bool> {
72 ensure!(is_known_dc(dc), "Unknown Dc {dc}");
73 let script = dc_get_script(dc)?;
74 Ok(script.get(0..3) == Some("EL "))
75}
76
77pub fn dc_get_el_class(dc: u32) -> Result<String> {
78 ensure!(is_known_dc(dc), "Unknown Dc {dc}");
79 let script = dc_get_script(dc)?;
80 Ok(substring_bug_compatible(&script, 3, -1))
81}
82
83pub fn dc_get_field(dc: u32, field_number: usize) -> Result<String> {
89 dc_data_lookup_by_id(
91 "DcData",
92 usize::try_from(dc).expect("Could not get usize from Dc"),
93 field_number,
94 )
95 .map_err(|e| anyhow!("dc_get_field: {e}"))
96}
97
98pub fn dc_get_name(dc: u32) -> Result<String> {
100 dc_get_field(dc, DCDATA_NAME_COL)
101}
102
103pub fn dc_get_combining_class(dc: u32) -> Result<String> {
105 dc_get_field(dc, DCDATA_COMBINING_CLASS_COL)
106}
107
108pub fn dc_get_bidi_class(dc: u32) -> Result<String> {
110 dc_get_field(dc, DCDATA_BIDI_CLASS_COL)
111}
112
113pub fn dc_get_casing(dc: u32) -> Result<String> {
115 dc_get_field(dc, DCDATA_CASING_COL)
116}
117
118pub fn dc_get_type(dc: u32) -> Result<String> {
120 dc_get_field(dc, DCDATA_TYPE_COL)
121}
122
123pub fn dc_get_script(dc: u32) -> Result<String> {
125 dc_get_field(dc, DCDATA_SCRIPT_COL)
126}
127
128pub fn dc_get_complex_traits(dc: u32) -> Result<String> {
130 dc_get_field(dc, DCDATA_COMPLEX_TRAITS_COL)
131}
132
133pub fn dc_get_description(dc: u32) -> Result<String> {
135 dc_get_field(dc, DCDATA_DESCRIPTION_COL)
136}
137
138pub fn get_dc_count() -> usize {
140 dc_dataset_length("DcData")
141}
142
143pub fn dc_get_column(
145 dataset: &str,
146 field_number: usize,
147) -> Result<Vec<String>> {
148 if !is_dc_dataset(dataset) {
149 return Err(anyhow!("dc_get_column: unknown dataset '{dataset}'"));
150 }
151 let len = dc_dataset_length(dataset);
152 let mut out = Vec::with_capacity(len);
153 for row in 0..len {
154 let v = dc_data_lookup_by_id(dataset, row, field_number)
155 .map_err(|e| anyhow!("dc_get_column: {e}"))?;
156 out.push(v);
157 }
158 Ok(out)
159}
160
161pub fn dc_get_mapping_to_format(dc: u32, format: &str) -> Result<String> {
170 let dataset = format!("mappings/to/{format}");
171 match dc_data_lookup_by_id(
173 &dataset,
174 usize::try_from(dc).expect("Could not get usize from Dc"),
175 1,
176 ) {
177 Ok(s) => Ok(s),
178 Err(e) => Err(anyhow!("dc_get_mapping_to_format failed: {e}")),
179 }
180}
181
182pub fn is_dc_base64_encapsulation_character(dc: u32) -> bool {
183 (127..=190).contains(&dc) || dc == 195
184}
185
186pub fn string_to_dc_encapsulated_utf8(input: &str) -> Vec<u32> {
187 bytes_as_dc_encapsulated_utf8(input.as_bytes())
188}
189
190pub fn bytes_as_dc_encapsulated_utf8(input: &[u8]) -> Vec<u32> {
191 let mut out: Vec<u32> = Vec::new();
192
193 out.push(191); out.append(&mut bytes_to_dc_encapsulated_raw(input));
195 out.push(192); out
198}
199
200pub fn bytes_to_dc_encapsulated_binary(input: &[u8]) -> Vec<u32> {
201 let mut out: Vec<u32> = Vec::new();
202
203 out.push(203); out.append(&mut bytes_to_dc_encapsulated_raw(input));
205 out.push(204); out
208}
209
210pub fn bytes_to_dc_encapsulated_raw(bytes: &[u8]) -> Vec<u32> {
211 let decimal = standard_base64_to_decimal(bytes_to_standard_base64(bytes))
212 .expect("Failed to encode base64");
213
214 let mut dc_encoded: Vec<u32> = Vec::new();
215 for b64 in decimal {
216 if b64 == 64 {
217 dc_encoded.push(195_u32);
219 } else {
220 dc_encoded.push((b64 + 127).into());
221 }
222 }
223
224 dc_encoded
225}
226
227pub fn dc_encapsulated_raw_to_bytes(input: &[u32]) -> Result<Vec<u8>> {
228 let mut out: Vec<u8> = Vec::new();
229
230 let mut dc_decoded: Vec<u8> = Vec::new();
232 for dc in input {
233 if *dc == 195 {
234 dc_decoded.push(64);
235 continue;
236 }
237 if !is_dc_base64_encapsulation_character(*dc) {
238 return Err(anyhow!(
239 "Invalid Dc {dc} in encapsulated raw sequence"
240 ));
241 }
242 dc_decoded.push(u8::try_from(dc - 127)?);
243 }
244
245 let base64 = decimal_to_standard_base64(dc_decoded)
246 .expect("Failed to translate Dcs to base64");
247
248 out.extend_from_slice(&standard_base64_to_bytes(base64)?);
249
250 Ok(out)
251}
252
253#[cfg(test)]
254mod tests {
255
256 use crate::utilities::{assert_vec_u8_ok_eq, assert_vec_u32_eq};
257
258 use super::*;
259
260 #[crate::ctb_test]
261 fn test_dc_newline_list() {
262 for dc in [119, 120, 121, 240, 294, 295] {
263 assert!(dc_is_newline(dc));
264 }
265 assert!(!dc_is_newline(118));
266 }
267
268 #[crate::ctb_test]
269 fn test_dc_bidi_class_120() {
270 assert_eq!(
271 dc_get_bidi_class(120).expect("Bidi class was incorrect"),
272 "B"
273 );
274 }
275
276 #[crate::ctb_test]
277 fn test_dc_is_space() {
278 assert!(is_known_dc(18));
279 assert_eq!(dc_get_type(18).expect("Dc type was incorrect"), "Zs");
280 assert!(dc_is_space(18).expect("Dc 18 is a space"));
281 }
282
283 #[crate::ctb_test]
284 fn test_format_dc_predicates() {
285 match dc_is_printable(21) {
290 Ok(v) => assert!(v, "Expected dc 21 printable"),
291 Err(e) => panic!("Failed to run dc_is_printable(21): {e}"),
292 }
293
294 match dc_is_printable(231) {
296 Ok(v) => assert!(!v, "Expected dc 231 NOT printable"),
297 Err(e) => panic!("Failed to run dc_is_printable(231): {e}"),
298 }
299
300 assert!(
302 dc_is_newline(120),
303 "Expected dc 120 to be recognized as newline"
304 );
305 }
306
307 #[crate::ctb_test]
308 fn test_bytes_to_dc_encapsulated_raw() {
309 let input = b"Hello, world!";
310 let expected = vec![
316 145, 133, 148, 171, 154, 133, 187, 171, 135, 134, 156, 174, 155, 165, 176, 163, 135, 143, 195, 195,
320 ];
321 let result = bytes_to_dc_encapsulated_raw(input);
322 assert_vec_u32_eq(&expected, &result);
323 }
324
325 #[crate::ctb_test]
326 fn test_dc_encapsulated_raw_to_bytes() {
327 let input = vec![
328 145, 133, 148, 171, 154, 133, 187, 171, 135, 134, 156, 174, 155, 165, 176, 163, 135, 143, 195, 195,
332 ];
333 let expected = b"Hello, world!";
334 let result = dc_encapsulated_raw_to_bytes(&input);
335 assert_vec_u8_ok_eq(expected, result);
336 }
337
338 #[crate::ctb_test]
339 fn test_bytes_to_dc_encapsulated_utf8() {
340 let input = "Hello, world!";
341 let expected = vec![
347 191, 145, 133, 148, 171, 154, 133, 187, 171, 135, 134, 156, 174, 155, 165, 176, 163, 135, 143, 195, 195, 192,
353 ];
354 let result = string_to_dc_encapsulated_utf8(input);
355 assert_eq!(result, expected);
356 let result = bytes_as_dc_encapsulated_utf8(input.as_bytes());
357 assert_eq!(result, expected);
358 }
359
360 #[crate::ctb_test]
361 fn test_bytes_to_dc_encapsulated_binary() {
362 let input = b"Hello, world!";
363 let expected = vec![
369 203, 145, 133, 148, 171, 154, 133, 187, 171, 135, 134, 156, 174, 155, 165, 176, 163, 135, 143, 195, 195, 204,
375 ];
376 let result = bytes_to_dc_encapsulated_binary(input);
377 assert_eq!(result, expected);
378 }
379}