1use anyhow::{Result, anyhow, bail, ensure};
4use malachite::Natural;
5use malachite::base::num::basic::traits::Zero;
6use malachite::base::num::conversion::traits::ToStringBase;
7
8use crate::formats::FormatLog;
9use crate::formats::eite::encoding::ascii::{
10 byte_from_stagel_char, stagel_char_from_byte,
11};
12
13#[derive(Clone)]
14pub struct BaseConversionPaddingMode {
15 pub pad_l: u32,
17 pub pad_fit: bool,
19}
20
21impl Default for BaseConversionPaddingMode {
22 fn default() -> Self {
23 Self {
24 pad_l: 1,
25 pad_fit: false,
26 }
27 }
28}
29
30impl BaseConversionPaddingMode {
31 pub fn none() -> Self {
32 Self::default()
33 }
34}
35
36#[derive(Clone)]
37#[allow(clippy::struct_excessive_bools)]
38pub struct BaseStringFormatSettings {
39 pub prefix: String,
41 pub separator: String,
43 pub lowercase: bool,
45 pub filter_chars: bool,
48 pub collapse_filtered: bool,
51 pub collapse_only: Vec<String>,
53 pub parse_prefixes: bool,
57 pub limit: u64,
63 pub pad: BaseConversionPaddingMode,
65}
66
67impl Default for BaseStringFormatSettings {
68 fn default() -> Self {
69 Self {
70 prefix: String::new(),
71 separator: " ".into(),
72 lowercase: false,
73 limit: 0,
74 filter_chars: true,
75 collapse_only: Vec::new(),
76 collapse_filtered: false,
77 parse_prefixes: true,
78 pad: BaseConversionPaddingMode {
79 pad_l: 1,
80 pad_fit: false,
81 },
82 }
83 }
84}
85
86pub fn int_to_base36_char(n: u8) -> Result<String> {
90 if !(0..=35).contains(&n) {
91 bail!("{n} is not within range 0..=35");
92 }
93 if n <= 9 {
94 stagel_char_from_byte(n + 48)
95 } else {
96 stagel_char_from_byte(n + 55)
97 }
98}
99
100pub fn int_from_base36_char(ch: &str) -> Result<u8> {
102 if ch.len() != 1 {
104 bail!("'{ch}' is not a single character");
105 }
106
107 let ch_uc = ch.to_ascii_uppercase();
109 let b = byte_from_stagel_char(&ch_uc)?;
110
111 let int_res = if b >= 65 {
112 if b > 90 {
113 bail!(
114 "'{ch_uc}' is not within the supported range of digits between 0 and Z (35)."
115 );
116 }
117 b - 55
118 } else {
119 if !(48..=57).contains(&b) {
120 bail!(
121 "'{ch}' is not within the supported range of digits between 0 and Z (35)."
122 );
123 }
124 b - 48
125 };
126
127 if !(0..=35).contains(&int_res) {
128 bail!("Internal error in int_from_base36_char called with n='{ch}'.");
129 }
130
131 Ok(int_res)
132}
133
134pub fn int_from_base_str_u32(s: &str, base: u8) -> Result<u32> {
136 if !is_supported_base(base) {
137 bail!("Unsupported base {base}");
138 }
139 let mut acc: u64 = 0;
140 for ch in s.chars() {
141 let d = int_from_base36_char(&ch.to_string())?;
142 if d >= base {
143 bail!("Digit {d} >= base {base}");
144 }
145 acc = acc * u64::from(base) + u64::from(d);
146 if acc > u64::from(u32::MAX) {
147 bail!("Overflow converting {s} base {base}");
148 }
149 }
150 Ok(u32::try_from(acc).expect("Did not fit in u32"))
151}
152
153pub fn int_from_base_str_u128(s: &str, base: u8) -> Result<u128> {
155 if !is_supported_base(base) {
156 bail!("Unsupported base {base}");
157 }
158 let mut acc: u128 = 0;
159 for ch in s.chars() {
160 let d = int_from_base36_char(&ch.to_string())?;
161 if d >= base {
162 bail!("Digit {d} >= base {base}");
163 }
164 acc = acc
165 .checked_mul(u128::from(base))
166 .ok_or_else(|| anyhow!("Overflow converting {s} base {base}"))?;
167 acc = acc
168 .checked_add(u128::from(d))
169 .ok_or_else(|| anyhow!("Overflow converting {s} base {base}"))?;
170 }
171 Ok(acc)
172}
173
174pub fn int_from_base_str_big(s: &str, base: u8) -> Result<Natural> {
176 if !is_supported_base(base) {
177 bail!("Unsupported base {base}");
178 }
179 let mut acc = Natural::ZERO;
180 for ch in s.chars() {
181 let d = int_from_base36_char(&ch.to_string())?;
182 if d >= base {
183 bail!("Digit {d} >= base {base}");
184 }
185 acc = acc * Natural::from(base) + Natural::from(d);
186 }
187 Ok(acc)
188}
189
190pub fn int_to_base_str(mut n: u32, base: u8) -> Result<String> {
192 if !is_supported_base(base) {
193 bail!("Unsupported base {base}");
194 }
195 if n == 0 {
196 return Ok("0".into());
197 }
198 let mut out = String::new();
199 while n > 0 {
200 let digit = n % u32::from(base);
201 out.push(
202 int_to_base36_char(digit.try_into()?)?
203 .chars()
204 .next()
205 .unwrap(),
206 );
207 n /= u32::from(base);
208 }
209 Ok(out.chars().rev().collect())
210}
211
212pub fn hex_to_dec_single(s: &str) -> Result<u32> {
213 int_from_base_str_u32(s, 16)
214}
215
216pub fn dec_to_hex_single(n: u32) -> Result<String> {
217 int_to_base_str(n, 16)
218}
219
220pub fn hex_to_dec_string(s: &str) -> Result<(String, FormatLog)> {
221 base_to_base_string(s, 16, 10, &BaseStringFormatSettings::default())
222}
223
224pub fn dec_to_hex_string(s: &str) -> Result<(String, FormatLog)> {
225 base_to_base_string(s, 10, 16, &BaseStringFormatSettings::default())
226}
227
228pub fn get_digits_needed(n: Natural, base: u8) -> Result<Natural> {
229 ensure!(is_supported_base(base), "Unsupported base {base}");
230 let mut digits = Natural::ZERO;
231 let mut value = n;
232 while value > 0 {
233 value /= Natural::from(base);
234 digits += Natural::from(1u8);
235 }
236 Ok(digits)
237}
238
239pub fn casefold_base_chars_in_string(
240 s: &str,
241 base: u8,
242 uppercase: bool,
243) -> Result<String> {
244 ensure!(is_supported_base(base), "Unsupported base {base}");
245 let mut result = String::new();
246 for c in s.chars() {
247 if is_base_digit(c.to_string().as_str(), base)? {
248 result.push(if uppercase {
249 c.to_ascii_uppercase()
250 } else {
251 c.to_ascii_lowercase()
252 });
253 } else {
254 result.push(c);
255 }
256 }
257 Ok(result)
258}
259
260pub fn format_base_string(
263 s: &str,
264 base: u8,
265 settings: &BaseStringFormatSettings,
266) -> Result<(String, FormatLog)> {
267 let parsed = _parse_base_string(
268 s,
269 base,
270 base,
271 settings.parse_prefixes,
272 settings.filter_chars,
273 settings.limit,
274 settings.collapse_filtered,
275 &settings.collapse_only,
276 )?;
277 let (out, mut log) = parsed;
278
279 let formatted = _format_base_string(out, base, settings)?;
280 log.merge(&formatted.1);
281
282 Ok((formatted.0, log))
283}
284
285pub fn base_to_base_string(
289 s: &str,
290 from_base: u8,
291 to_base: u8,
292 format_settings: &BaseStringFormatSettings,
293) -> Result<(String, FormatLog)> {
294 let converted = _parse_base_string(
295 s,
296 from_base,
297 to_base,
298 format_settings.parse_prefixes,
299 format_settings.filter_chars,
300 format_settings.limit,
301 format_settings.collapse_filtered,
302 &format_settings.collapse_only,
303 )?;
304
305 let (res, mut log) = converted;
306
307 let (formatted_res, formatted_log) =
308 _format_base_string(res, to_base, format_settings)?;
309 log.merge(&formatted_log);
310
311 Ok((formatted_res, log))
312}
313
314fn _parse_base_string(
318 s: &str,
319 from_base: u8,
320 to_base: u8,
321 parse_prefixes: bool,
322 filter_chars: bool,
323 limit: u64,
324 collapse_filtered: bool,
325 collapse_only: &Vec<String>,
326) -> Result<(Vec<String>, FormatLog)> {
327 ensure!(
328 is_supported_base(from_base),
329 "Unsupported from_base {from_base}"
330 );
331 ensure!(is_supported_base(to_base), "Unsupported to_base {to_base}");
332 let mut log = FormatLog::default();
333 let chars: Vec<char> = s.chars().collect();
334 let mut i = 0;
335 let mut in_num = false;
336 let mut num_chars: String = String::new();
337 let mut out: Vec<String> = Vec::new();
338 let max_digits = get_digits_needed(Natural::from(limit), from_base)?;
339 let max_digits: usize = usize::try_from(&max_digits).map_err(|e| {
340 anyhow!(
341 "Base conversion length of digits greater than usize, limited by String.len(): {e:?}"
342 )
343 })?;
344 let max_digits = if limit == 0 { 0 } else { max_digits };
345
346 if (limit > 1) && !(limit + 1).is_multiple_of(u64::from(from_base)) {
347 log.warn(format!("The limit was derived from the number of digits required to represent {limit}, but {limit} + 1 is not a multiple of the input base {from_base}. That's not necessarily wrong, but note that the limit is not directly the maximum number of digits, but the maximum value representable in the number of digits to limit to.").as_str());
348 }
349
350 let base_prefix_char: Option<char> = if parse_prefixes {
351 match from_base {
352 2 => Some('b'),
353 8 => Some('o'),
354 16 => Some('x'),
355 _ => None,
356 }
357 } else {
358 None
359 };
360
361 let base_prefix: Option<String> = base_prefix_char.map(|c| format!("0{c}"));
362
363 let finalize_num = |num_chars: &mut String,
364 out: &mut Vec<String>|
365 -> Result<()> {
366 if let Some(base_prefix) = &base_prefix
367 && num_chars.starts_with(base_prefix)
368 {
369 *num_chars = num_chars.trim_start_matches(base_prefix).to_string();
370 }
371 out.push(
372 int_from_base_str_big(num_chars, from_base)?
373 .to_string_base(to_base)
374 .to_uppercase(),
375 );
376 Ok(())
377 };
378 let normalize_or_push_char = |out: &mut Vec<String>, c: char| {
379 if !filter_chars {
380 out.push(c.to_string());
381 }
382 };
383 while i < chars.len() {
384 let c: char = chars[i];
385
386 let this_is_base_digit =
387 is_base_digit(c.to_string().as_str(), from_base)?;
388
389 if let Some(base_prefix_char) = base_prefix_char {
390 let potential_prefix =
391 if let Some(potential_prefix) = chars.get(i..i + 2) {
392 if potential_prefix.len() == 2 {
393 Some((potential_prefix[0], potential_prefix[1]))
394 } else {
395 None
396 }
397 } else {
398 None
399 };
400
401 let next = chars.get(i + 2);
402 let next_is_base_digit = if let Some(next) = next {
403 is_base_digit(&next.to_string(), from_base)?
404 } else {
405 false
406 };
407 if let Some(potential_prefix) = potential_prefix
408 && potential_prefix.0 == '0'
409 && potential_prefix.1 == base_prefix_char
410 && next_is_base_digit
411 {
412 if in_num {
413 finalize_num(&mut num_chars, &mut out)?;
414 in_num = false;
415 num_chars.clear();
416 }
417
418 i += 2;
419 continue;
420 }
421 }
422
423 if this_is_base_digit {
424 in_num = true;
425
426 num_chars.push(c);
427 } else {
428 let mut this_collapse_filtered = false;
429 let mut in_collapse_only = false;
430 if c != ' ' && c != ',' {
431 in_collapse_only =
433 collapse_only.iter().any(|s| s == &c.to_string());
434 this_collapse_filtered = collapse_filtered;
435 if !in_collapse_only {
436 log.import_warning(
439 i.try_into().expect("usize did not fit in u64"),
440 &format!(
441 "Unexpected character '{c}' in base {from_base}"
442 ),
443 );
444 }
445 }
446 if !this_collapse_filtered && !in_collapse_only {
447 if in_num {
448 finalize_num(&mut num_chars, &mut out)?;
449 in_num = false;
450 num_chars.clear();
451 }
452
453 normalize_or_push_char(&mut out, c);
454 }
455 }
456
457 if in_num && (max_digits > 0) && (num_chars.len() == max_digits) {
458 finalize_num(&mut num_chars, &mut out)?;
459 in_num = false;
460 num_chars.clear();
461 }
462
463 i += 1;
464 }
465
466 if in_num && !num_chars.is_empty() {
467 finalize_num(&mut num_chars, &mut out)?;
468 }
469
470 Ok((out, log))
471}
472
473pub fn _format_base_string(
474 tokens: Vec<String>,
475 base: u8,
476 settings: &BaseStringFormatSettings,
477) -> Result<(String, FormatLog)> {
478 let mut log: FormatLog = FormatLog::default();
479
480 let pad = &settings.pad;
481 let limit = settings.limit;
482 let num_prefix = &settings.prefix;
483
484 let padded_width: u32 = if pad.pad_fit {
485 let max_digits = get_digits_needed(Natural::from(limit), base)?;
486 u32::try_from(&max_digits)
487 .map_err(|e| anyhow!("Padding to more than 32 bits of digits is not supported just because it seems unnecessary, but could be increased: {e:?}"))?
488 } else {
489 pad.pad_l
490 };
491 if (pad.pad_fit) && (limit == 0) {
492 log.import_error(
493 0,
494 "Padding to fit limit was requested, but no limit was set.",
495 );
496 bail!("Incompatible padding and limit settings");
497 }
498 if (pad.pad_fit) && (limit == 1) {
499 log.import_warning(0, "Padding to fit limit was requested, but limit was set to 1. 0 is always shown as 0 anyway, so the padding option will do nothing.");
500 }
501 if (pad.pad_fit) && (pad.pad_l > 1) {
502 log.import_error(0, "Padding to fit limit was requested, but a separate padding width was also requested. Please set one or the other.");
507 bail!("Multiple padding configurations given");
508 }
509
510 let padded_width = std::cmp::max(pad.pad_l, padded_width);
511
512 let mut out: String = String::new();
513 for (index, token) in tokens.iter().enumerate() {
514 let formatted = if is_base_str(token, base)? {
515 let separator = if index < tokens.len() - 1 {
516 &settings.separator
517 } else {
518 ""
519 };
520 format!(
521 "{num_prefix}{:0>width$}{}",
522 token,
523 separator,
524 width = usize::try_from(padded_width)
525 .expect("u32 did not fit in usize")
526 )
527 } else {
528 token.clone()
529 };
530 out.push_str(&formatted);
531 }
532
533 Ok((
534 if settings.lowercase {
535 casefold_base_chars_in_string(&out, base, false)?
536 } else {
537 casefold_base_chars_in_string(&out, base, true)?
538 },
539 log,
540 ))
541}
542
543pub fn is_supported_base(base: u8) -> bool {
544 (1..=36).contains(&base)
545}
546
547pub fn is_base_digit(ch: &str, base: u8) -> Result<bool> {
548 if ch.chars().count() != 1 {
549 bail!("Invalid digit");
550 }
551 if !is_supported_base(base) {
552 bail!("Unsupported base {base}");
553 }
554 let v = int_from_base36_char(ch);
555 if v.is_err() {
556 return Ok(false);
557 }
558 Ok(v.unwrap() < base)
559}
560
561pub fn is_base_str(s: &str, base: u8) -> Result<bool> {
562 if !is_supported_base(base) {
563 bail!("Unsupported base {base}");
564 }
565 for ch in s.chars() {
566 if !is_base_digit(&ch.to_string(), base)? {
567 return Ok(false);
568 }
569 }
570 Ok(true)
571}
572
573pub fn char_from_hex_byte(hex: &str) -> Result<char> {
576 if hex.len() != 2 {
577 return Err(anyhow!("Expected 2 hex digits, got {}", hex.len()));
578 }
579 let v = int_from_base_str_u32(hex, 16)?;
580 if v > 0xFF {
581 return Err(anyhow!("Hex byte out of range"));
582 }
583 Ok(char::from_u32(v).unwrap())
584}
585
586#[cfg(test)]
607mod tests {
608 use crate::formats::{
609 assert_string_ok_eq_no_errors, assert_string_ok_eq_no_warnings,
610 };
611
612 use super::*;
613
614 #[crate::ctb_test]
615 fn test_base36_digit_roundtrip() {
616 for n in 0..=35 {
617 let ch = int_to_base36_char(n).unwrap();
618 let v = int_from_base36_char(&ch).unwrap();
619 assert_eq!(n, v);
620 }
621 assert!(int_to_base36_char(36).is_err());
622 }
623
624 #[crate::ctb_test]
625 fn test_hex_conversion_examples() {
626 let hex = dec_to_hex_single(9917).unwrap();
628 assert_eq!(hex, "26BD");
629 let dec = hex_to_dec_single("26BD").unwrap();
630 assert_eq!(dec, 9917);
631 }
632
633 #[crate::ctb_test]
634 fn test_char_from_hex_byte() {
635 assert_eq!(char_from_hex_byte("41").unwrap(), 'A');
636 assert_eq!(char_from_hex_byte("7f").unwrap(), '\u{007F}');
637 assert!(char_from_hex_byte("XYZ").is_err());
638 }
639
640 #[crate::ctb_test]
641 fn test_base_to_base_string() {
642 let format_settings = BaseStringFormatSettings::default();
643 assert_string_ok_eq_no_warnings(
644 "26",
645 base_to_base_string("1A", 16, 10, &format_settings),
646 );
647
648 let (_result, _log) = assert_string_ok_eq_no_warnings(
649 "26 16 4",
650 base_to_base_string("0x1A, 0x10, 0x04", 16, 10, &format_settings),
651 );
652
653 let (_result, log) = assert_string_ok_eq_no_errors(
654 "0x26, 0x16, 0x4",
657 base_to_base_string(
658 "0x1A, 0x10, 0x04",
659 16,
660 10,
661 &BaseStringFormatSettings {
662 separator: "".to_string(),
663 filter_chars: false,
664 parse_prefixes: false,
665 ..Default::default()
666 },
667 ),
668 );
669 assert!(log.has_warnings());
670
671 assert_string_ok_eq_no_warnings(
672 "12",
673 base_to_base_string("10", 10, 8, &format_settings),
674 );
675
676 let (_result, log) = assert_string_ok_eq_no_errors(
677 "26,uuuu 4F,é 16, 4",
678 base_to_base_string(
679 "26,uuuu 4F,é 16, 0x04",
680 16,
681 16,
682 &BaseStringFormatSettings {
683 separator: "".to_string(),
684 filter_chars: false,
685 ..Default::default()
686 },
687 ),
688 );
689 assert!(log.has_warnings());
690
691 assert_string_ok_eq_no_warnings(
692 "0x026!0x04f!0x016!0x004",
693 base_to_base_string(
694 "26, 4F, 16, 0x04",
695 16,
696 16,
697 &BaseStringFormatSettings {
698 prefix: "0x".to_string(),
699 separator: "!".to_string(),
700 lowercase: true,
701 pad: BaseConversionPaddingMode {
702 pad_l: 3,
703 pad_fit: false,
704 },
705 ..Default::default()
706 },
707 ),
708 );
709
710 assert_string_ok_eq_no_warnings(
711 "0x26!0x4F!0x16!0x04",
712 base_to_base_string(
713 "26, 4F, 16, 0x04",
714 16,
715 16,
716 &BaseStringFormatSettings {
717 prefix: "0x".to_string(),
718 separator: "!".to_string(),
719 lowercase: false,
720 limit: u64::from(u8::MAX),
721 pad: BaseConversionPaddingMode {
722 pad_l: 0,
723 pad_fit: true,
724 },
725 ..Default::default()
726 },
727 ),
728 );
729
730 let (_result, log) = assert_string_ok_eq_no_errors(
731 "26 4 16",
732 format_base_string(
733 "26, 4F, 16",
734 10,
735 &BaseStringFormatSettings::default(),
736 ),
737 );
738 assert!(log.has_warnings());
739
740 assert_string_ok_eq_no_warnings(
741 "26, 4F, 16, F, 0",
742 format_base_string(
743 "0x26, 4f, 16f, 0",
744 16,
745 &BaseStringFormatSettings {
746 separator: ", ".to_string(),
747 limit: 255,
748 ..Default::default()
749 },
750 ),
751 );
752
753 assert_string_ok_eq_no_warnings(
754 "2 6 4 F 1 6",
755 format_base_string(
756 "0x26, 4f, 16",
757 16,
758 &BaseStringFormatSettings {
759 limit: 1,
760 ..Default::default()
761 },
762 ),
763 );
764 }
765
766 #[crate::ctb_test]
767 fn test_format_base_string() {
768 let (_result, log) = assert_string_ok_eq_no_errors(
769 "26 0 4F 0 16F",
770 format_base_string(
771 "26, 0n4F, 0x16fZz",
772 16,
773 &BaseStringFormatSettings {
774 parse_prefixes: false,
775 ..Default::default()
776 },
777 ),
778 );
779 assert!(log.has_warnings());
780
781 let (_result, log) = assert_string_ok_eq_no_errors(
782 "0x26!, 0x0!n0x4f!, 0x0!x0x16f!Zz",
783 format_base_string(
784 "26, 0n4F, 0x16fZz",
785 16,
786 &BaseStringFormatSettings {
787 prefix: "0x".to_string(),
788 separator: "!".to_string(),
789 lowercase: true,
790 parse_prefixes: false,
791 filter_chars: false,
792 ..Default::default()
793 },
794 ),
795 );
796 assert!(log.has_warnings());
797 }
798
799 #[crate::ctb_test]
800 fn test_collapse_filtered() {
801 let settings = BaseStringFormatSettings {
802 collapse_filtered: true,
803 ..Default::default()
804 };
805 let res = base_to_base_string("10_0!00", 10, 10, &settings);
807 let (res, log) = res.expect("Error");
808 assert_eq!("10000", res);
809 assert!(log.has_warnings());
810 }
811
812 #[crate::ctb_test]
813 fn test_collapse_only() {
814 let settings = BaseStringFormatSettings {
815 collapse_only: vec!["_".to_string()],
816 filter_chars: true,
817 ..Default::default()
818 };
819 assert_string_ok_eq_no_warnings(
821 "10000",
822 base_to_base_string("10_000", 10, 10, &settings),
823 );
824 let conv = base_to_base_string("10_000!", 10, 10, &settings);
825 assert!(conv.is_ok());
826 let (conv, log) = conv.expect("checked");
827 assert_eq!("10000", conv);
828 assert!(log.has_warnings());
829 }
830}