1use anyhow::{Result, anyhow, bail};
22
23use crate::formats::base16b;
24use crate::formats::eite::encoding::pack32::{
25 is_pack32_char, pack32, unpack32,
26};
27use crate::formats::eite::util::array::subset;
28use crate::formats::eite::util::bitwise::{
29 byte_array_from_int_bit_array, byte_array_to_int_bit_array,
30};
31use crate::formats::eite::util::math::int_is_between_u32;
32use crate::{bail_if_none, log};
33
34pub const ARMORED_BASE17B_UTF8_START_UUID_BYTES: [u8; 32] = [
36 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
38 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
39];
40
41pub const ARMORED_BASE17B_UTF8_END_UUID_BYTES: [u8; 32] = [
43 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
44 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
46];
47
48pub fn int_bit_array_to_basenb_no_remainder_marker(
51 base: u32,
52 input: &[u8],
53) -> Result<Vec<u8>> {
54 let encoded = base16b::encode(input, base)?;
55
56 Ok(encoded.into_bytes())
57}
58
59pub fn int_bit_array_from_basenb_string(
62 input_bytes: &[u8],
63 remainder_len: Option<u32>,
64) -> Result<Vec<u8>> {
65 let s = std::str::from_utf8(input_bytes).map_err(|e| {
66 anyhow!(
67 "utf8 error on input {:?}, from_lossy {:?}: {e}",
68 input_bytes,
69 String::from_utf8_lossy(input_bytes)
70 )
71 })?;
72 log!(
73 "Decoding with input str {:?}, remainder {:?}",
74 s,
75 remainder_len
76 );
77 base16b::decode(s, remainder_len)
78}
79
80pub fn is_basenb_base(base: u32) -> bool {
82 (7..=17).contains(&base)
83}
84
85pub fn is_basenb_char(packed_char: &[u8]) -> bool {
94 if !is_pack32_char(packed_char) {
95 return false;
96 }
97 if let Ok(cp) = unpack32(packed_char) {
98 if int_is_between_u32(cp, 983_040, 1_048_573) {
100 return true;
101 }
102 if int_is_between_u32(cp, 1_048_576, 1_114_109) {
104 return true;
105 }
106 if int_is_between_u32(cp, 63_481, 63_501) {
109 return true;
110 }
111 }
112 false
113}
114
115pub fn is_basenb_distinct_remainder_char(packed_char: &[u8]) -> bool {
118 if !is_pack32_char(packed_char) {
119 return false;
120 }
121 if let Ok(cp) = unpack32(packed_char) {
122 return int_is_between_u32(cp, 63_481, 63_497);
124 }
125 false
126}
127
128pub fn byte_array_to_basenb_no_remainder_marker(
129 base: u32,
130 input: &[u8],
131) -> Result<Vec<u8>> {
132 if !is_basenb_base(base) {
133 return Err(anyhow!(
134 "byte_array_to_basenb_no_remainder_marker: invalid base {base}, expected 7..=17"
135 ));
136 }
137 let bit_array = byte_array_to_int_bit_array(input);
138 let encoded =
139 int_bit_array_to_basenb_no_remainder_marker(base, &bit_array)?;
140 Ok(encoded)
141}
142
143pub fn byte_array_to_basenb_utf8(base: u32, input: &[u8]) -> Result<Vec<u8>> {
154 let mut encoded = byte_array_to_basenb_no_remainder_marker(base, input)?;
155 let remainder = (input.len() * 8) % usize::try_from(base)?;
157 let codepoint = 63_497 - (u32::try_from(remainder)?);
159 encoded.extend(pack32(codepoint)?);
160 Ok(encoded)
161}
162
163pub const BYTE_ARRAY_FROM_BASENB_UTF8_INVALID_INPUT_EXCEPTION_BYTES: [u8; 16] = [
167 51, 98, 218, 163, 23, 5, 64, 236, 154, 151, 89, 208, 82, 253, 64, 55,
168];
169
170pub fn byte_array_from_basenb_utf8(input: &[u8]) -> Result<Vec<u8>> {
190 let mut remainder: u32;
192 let mut remainder_arr: Vec<u8>;
194 remainder_arr = bail_if_none!(subset(input, -3, -1));
195 if is_basenb_distinct_remainder_char(&remainder_arr) {
196 remainder = unpack32(&remainder_arr)?;
197 remainder = 63497_u32.checked_sub(remainder).unwrap();
198 } else {
199 remainder_arr = bail_if_none!(subset(input, -4, -1));
201 let remainder_decoded: Vec<u8> = byte_array_from_int_bit_array(
202 &int_bit_array_from_basenb_string(&remainder_arr, Some(8))?,
203 )?;
204 let temp: &u8 = bail_if_none!(remainder_decoded.first());
205 let temp: i16 = bail_if_none!(i16::from(*temp).checked_add(-2));
206 remainder = u32::try_from(temp)?;
207 }
208 if input.len() <= remainder_arr.len() {
209 return Ok(
211 BYTE_ARRAY_FROM_BASENB_UTF8_INVALID_INPUT_EXCEPTION_BYTES.to_vec()
212 );
213 }
214 let mut subset_end = i64::try_from(remainder_arr.len())?;
215 subset_end *= -1;
216 subset_end += -1;
217
218 let subset = bail_if_none!(subset(input, 0, subset_end));
219
220 log!("Getting bits from subset: {:?}, {:?}", &subset, remainder);
221
222 let bits = &int_bit_array_from_basenb_string(&subset, Some(remainder))?;
223 log!("Bits from decoder: {:?}", &bits);
224 byte_array_from_int_bit_array(bits)
225}
226
227pub fn byte_array_to_basenb_17_utf8(input: &[u8]) -> Result<Vec<u8>> {
233 byte_array_to_basenb_utf8(17, input)
234}
235
236pub fn byte_array_from_basenb_17_utf8(input: &[u8]) -> Result<Vec<u8>> {
240 byte_array_from_basenb_utf8(input)
241}
242
243pub fn byte_array_to_armored_base17b_utf8(input: &[u8]) -> Result<Vec<u8>> {
251 let encoded = byte_array_to_basenb_17_utf8(input)?;
253 let mut out = ARMORED_BASE17B_UTF8_START_UUID_BYTES.to_vec();
254 out.extend(encoded);
255 out.extend(ARMORED_BASE17B_UTF8_END_UUID_BYTES);
256 Ok(out)
257}
258
259pub fn byte_array_from_armored_base17b_utf8(input: &[u8]) -> Result<Vec<u8>> {
261 let start = ARMORED_BASE17B_UTF8_START_UUID_BYTES;
262 let end = ARMORED_BASE17B_UTF8_END_UUID_BYTES;
263
264 let min_len = start.len() + end.len();
265 if input.len() < min_len {
266 bail!(
267 "Armored Base17b input too short: {} < required framing {}",
268 input.len(),
269 min_len
270 );
271 }
272
273 if !input.starts_with(&start) {
274 bail!("Armored Base17b input missing or corrupt start UUID marker");
275 }
276 if !input.ends_with(&end) {
277 bail!("Armored Base17b input missing or corrupt end UUID marker");
278 }
279
280 let inner_len = input.len() - start.len() - end.len();
281 let inner = &input[start.len()..start.len() + inner_len];
282 let decoded = byte_array_from_basenb_17_utf8(inner)?;
284 Ok(decoded)
285}
286
287#[cfg(test)]
288mod tests {
289
290 use crate::formats::eite::formats::dcbasenb::DC_BASENB_EMBEDDED_START_BYTES;
291 use crate::formats::eite::{
292 encoding::pack32::pack32, util::bitwise::byte_array_to_int_bit_array,
293 };
294 use crate::utilities::{assert_vec_u8_eq, assert_vec_u8_ok_eq};
295
296 use super::*;
297
298 #[crate::ctb_test]
299 fn test_is_basenb_base() {
300 for b in 0..=30 {
301 let valid = is_basenb_base(b);
302 if (7..=17).contains(&b) {
303 assert!(valid);
304 } else {
305 assert!(!valid);
306 }
307 }
308 }
309
310 #[crate::ctb_test]
311 fn test_basenb_remainder_marker_range() {
312 for cp in 63_480..=63_500 {
314 let packed = pack32(cp).unwrap();
315 if (63_481..=63_497).contains(&cp) {
316 assert!(is_basenb_distinct_remainder_char(&packed));
317 } else {
318 assert!(!is_basenb_distinct_remainder_char(&packed));
319 }
320 }
321 }
322
323 #[crate::ctb_test]
324 fn test_basenb_char_ranges() {
325 for cp in [
327 63_480, 63_481, 63_495, 63_501, 63_502, 983_040, 983_100,
328 1_048_573, 1_048_574,
329 ] {
330 let packed = pack32(cp).unwrap();
331 let is_char = is_basenb_char(&packed);
332 let expected = (63_481..=63_501).contains(&cp)
333 || (983_040..=1_048_573).contains(&cp)
334 || (1_048_576..=1_114_109).contains(&cp);
335 assert_eq!(
336 is_char, expected,
337 "cp={} expected {} got {}",
338 cp, expected, is_char
339 );
340 }
341 }
342
343 #[crate::ctb_test]
344 fn test_byte_array_to_basenb_utf8_remainder_marker() {
345 let data = b"\xAB\xCD"; let bits = byte_array_to_int_bit_array(data);
349 assert_eq!(bits.len(), 16);
350 let base = 10;
351 let encoded_with_remainder =
352 byte_array_to_basenb_utf8(base, data).unwrap();
353 let encoded_b10b =
354 byte_array_to_basenb_no_remainder_marker(base, data).unwrap();
355 assert!(
356 encoded_with_remainder.len() >= 4,
357 "Expected at least one codepoint + remainder marker"
358 );
359 let len_diff = encoded_with_remainder.len() - encoded_b10b.len();
360 let lastn =
361 &encoded_with_remainder[encoded_with_remainder.len() - len_diff..];
362 assert!(is_basenb_distinct_remainder_char(lastn));
363 let cp = unpack32(lastn).unwrap();
364 let expected = 63_497
365 - (u32::try_from(bits.len()).expect("Could not fit length in u32")
366 % base);
367 assert_eq!(cp, expected);
368 assert_vec_u8_ok_eq(
369 data,
370 byte_array_from_basenb_utf8(&encoded_with_remainder),
371 );
372 }
373
374 #[crate::ctb_test]
375 fn test_byte_array_to_basenb_utf8_invalid_base() {
376 let data = b"abc";
377 assert!(byte_array_to_basenb_utf8(6, data).is_err());
378 assert!(byte_array_to_basenb_utf8(18, data).is_err());
379 }
380
381 fn is_sentinel(bytes: &[u8]) -> bool {
382 bytes == BYTE_ARRAY_FROM_BASENB_UTF8_INVALID_INPUT_EXCEPTION_BYTES
383 }
384
385 #[crate::ctb_test]
386 fn test_decode_empty_input() {
387 let enc = assert_vec_u8_ok_eq(
388 "\u{f80d}\u{f809}".as_bytes(),
389 byte_array_to_basenb_utf8(17, &[]),
390 );
391 let dec = byte_array_from_basenb_utf8(&enc);
392 assert_vec_u8_ok_eq(&[], dec);
393 }
394
395 #[crate::ctb_test]
396 fn test_decode_invalid_only_remainder() {
397 let dec = byte_array_from_basenb_utf8("\u{f809}".as_bytes()).unwrap();
398 assert!(
399 is_sentinel(&dec),
400 "Expected sentinel for empty input decode; got {:?}",
401 dec
402 );
403 }
404
405 #[crate::ctb_test]
406 fn test_round_trip_base17_small_samples() {
407 let samples: Vec<Vec<u8>> = vec![
408 vec![0u8],
409 vec![1, 2, 3],
410 vec![255],
411 b"hello".to_vec(),
412 b"\x00\x01\x02\x03\xFE\xFF".to_vec(),
413 (0u8..32u8).collect(),
414 ];
415
416 for sample in samples {
417 let enc = byte_array_to_basenb_17_utf8(&sample).unwrap();
418 crate::log!(
419 "Sample {:?} encoded to Basenb 17 UTF-8 bytes: {:?}",
420 sample.clone(),
421 enc.clone()
422 );
423 let dec = byte_array_from_basenb_17_utf8(&enc).unwrap();
424 if is_sentinel(&dec) && !sample.is_empty() {
425 panic!(
426 "Unexpected sentinel for non-empty sample {:?} (encoded {:?})",
427 sample, enc
428 );
429 }
430 if !sample.is_empty() {
431 assert_vec_u8_eq(&sample, &dec);
432 }
433 }
434 }
435
436 #[crate::ctb_test]
437 fn test_basenb_encode_uuid() {
438 let input: Vec<u8> = vec![
440 0xe8, 0x2e, 0xef, 0x60, 0x19, 0xbc, 0x4a, 0x00, 0xa4, 0x4a, 0x76, 0x3a, 0x34, 0x45, 0xc1, 0x6f,
444 ];
445
446 let expected_uuid = DC_BASENB_EMBEDDED_START_BYTES.to_vec();
452 let remainder = "\u{F800}".as_bytes().to_vec();
453 let expected = [expected_uuid, remainder].concat();
454
455 let enc = assert_vec_u8_ok_eq(
456 &expected,
457 byte_array_to_basenb_17_utf8(&input),
458 );
459
460 let dec = byte_array_from_basenb_17_utf8(&enc).unwrap();
461 if is_sentinel(&dec) && !input.is_empty() {
462 panic!(
463 "Unexpected exception UUID for {:?} (encoded {:?})",
464 input, enc
465 );
466 }
467
468 assert_vec_u8_eq(&input, &dec);
469 }
470
471 #[crate::ctb_test]
472 fn test_armored_round_trip() {
473 let payload = b"Hello Base17b Armored!";
474 let armored = byte_array_to_armored_base17b_utf8(payload).unwrap();
475 let start = ARMORED_BASE17B_UTF8_START_UUID_BYTES;
477 let end = ARMORED_BASE17B_UTF8_END_UUID_BYTES;
478 assert!(armored.starts_with(&start));
479 assert!(armored.ends_with(&end));
480
481 let decoded = byte_array_from_armored_base17b_utf8(&armored);
482 assert_vec_u8_ok_eq(payload, decoded);
483 }
484
485 #[crate::ctb_test]
486 fn test_armored_invalid_prefix() {
487 let payload = b"xyz";
488 let mut armored = byte_array_to_armored_base17b_utf8(payload).unwrap();
489 armored[0] ^= 0xFF;
491 let err = byte_array_from_armored_base17b_utf8(&armored).unwrap_err();
492 assert!(
493 err.to_string().contains("missing or corrupt start UUID"),
494 "Unexpected error: {err}"
495 );
496 }
497
498 #[crate::ctb_test]
499 fn test_armored_invalid_suffix() {
500 let payload = b"xyz";
501 let mut armored = byte_array_to_armored_base17b_utf8(payload).unwrap();
502 let last = armored.len() - 1;
504 armored[last] ^= 0xAA;
505 let err = byte_array_from_armored_base17b_utf8(&armored).unwrap_err();
506 assert!(
507 err.to_string().contains("missing or corrupt end UUID"),
508 "Unexpected error: {err}"
509 );
510 }
511
512 #[crate::ctb_test]
513 fn test_armored_too_short() {
514 let data: Vec<u8> = vec![1, 2, 3, 4]; let err = byte_array_from_armored_base17b_utf8(&data).unwrap_err();
516 assert!(
517 err.to_string().contains("too short"),
518 "Unexpected error: {err}"
519 );
520 }
521}