ctoolbox/formats/eite/encoding/
unicode.rs

1use anyhow::{Result, anyhow};
2
3use crate::formats::FormatLog;
4use crate::formats::eite::formats::{dc_from_format, dc_to_format};
5
6/// Convert a single Unicode scalar (given as an integer) to Dc array (1 element) using 'unicode' mapping.
7/// JS: dcaFromUnicodeChar(intChar)
8pub fn dca_from_unicode_char(int_char: u32) -> Result<(Vec<u32>, FormatLog)> {
9    // In JS: dcFromFormat('unicode', anFromN(intChar)) then push first if exists.
10    // We treat int_char as a Unicode scalar, encode it to UTF-8, then call dc_from_format.
11    let ch =
12        char::from_u32(int_char).ok_or_else(|| anyhow!("Invalid codepoint"))?;
13    let mut buf = [0u8; 4];
14    let s = ch.encode_utf8(&mut buf);
15    let (dcs, log) = dc_from_format("unicode", s.as_bytes())?;
16    if dcs.is_empty() {
17        Ok((vec![], log))
18    } else {
19        Ok((vec![dcs[0]], log))
20    }
21}
22
23/// Convert a Dc to a Unicode codepoint array (1 element).
24/// JS: dcToUnicodeCharArray(intDc)
25pub fn dc_to_unicode_char_array(dc: u32) -> Result<(Vec<u32>, FormatLog)> {
26    // JS did: dcToFormat('unicode', intDc) -> returns UTF-8 bytes -> take firstCharOfUtf8String
27    let (utf8_bytes, log) = dc_to_format("unicode", dc)?;
28    if utf8_bytes.is_empty() {
29        return Ok((vec![], log));
30    }
31    // Decode first UTF-8 codepoint
32    let ch = std::str::from_utf8(&utf8_bytes)
33        .map_err(|e| anyhow!("Invalid UTF-8 from dc_to_format: {e}"))?
34        .chars()
35        .next()
36        .ok_or_else(|| anyhow!("Empty UTF-8 string from dc_to_format"))?;
37    Ok((vec![u32::from(ch)], log))
38}