ctoolbox/formats/eite/util/
string.rs

1// ---------------
2// String char access / mutation
3// ---------------
4
5use anyhow::{Result, anyhow, bail};
6
7use crate::formats::eite::{
8    encoding::ascii::stagel_char_from_byte, util::ascii::ascii_is_digit,
9};
10
11pub fn str_char(s: &str, index: usize) -> String {
12    s.chars()
13        .nth(index)
14        .map(|c| c.to_string())
15        .unwrap_or_default()
16}
17pub fn str_char_at_pos(s: &str, index: usize) -> String {
18    str_char(s, index)
19}
20pub fn char_at_pos(s: &str, index: usize) -> String {
21    str_char(s, index)
22}
23pub fn char_at(s: &str, index: usize) -> String {
24    str_char(s, index)
25}
26
27pub fn set_char_at(s: &str, index: usize, ch: &str) -> String {
28    assert!(ch.chars().count() == 1, "Replacement must be single char");
29    s.chars()
30        .enumerate()
31        .map(|(i, c)| {
32            if i == index {
33                ch.chars().next().unwrap()
34            } else {
35                c
36            }
37        })
38        .collect()
39}
40
41pub fn reverse_str(s: &str) -> String {
42    s.chars().rev().collect()
43}
44
45pub fn char_to_upper(ch: &str) -> String {
46    assert!(ch.chars().count() == 1);
47    ch.to_ascii_uppercase()
48}
49
50pub fn str_to_upper(s: &str) -> String {
51    s.chars().map(|c| c.to_ascii_uppercase()).collect()
52}
53
54pub fn char_to_lower(ch: &str) -> String {
55    assert!(ch.chars().count() == 1);
56    ch.to_ascii_lowercase()
57}
58
59pub fn str_to_lower(s: &str) -> String {
60    s.chars().map(|c| c.to_ascii_lowercase()).collect()
61}
62
63pub fn str_empty(s: &str) -> bool {
64    s.is_empty()
65}
66pub fn str_nonempty(s: &str) -> bool {
67    !s.is_empty()
68}
69
70pub fn str_contains_only_int(s: &str) -> bool {
71    if s.is_empty() {
72        return true;
73    }
74    s.chars().all(|c| c.is_ascii_digit())
75}
76
77/// Index into a substring by start and lengths. Supports negative indices, but
78/// replicates an off-by-one error from the original StageL implementation: the
79/// length has to be one more than seems intuitive.
80/// `substring_bug_compatible("test", 0, -2)` would return "tes".
81pub fn substring_bug_compatible(
82    s: &str,
83    start: usize,
84    length: isize,
85) -> String {
86    if length < 0 {
87        // Negative length: JS code adjusts to (str.length + 1 + length)
88        // Example: length = -1 => take (len + 1 - 1) = len chars (effectively whole string)
89        let adj = usize::try_from(
90            (isize::try_from(s.len())
91                .expect("Failed to convert length to isize")
92                + 1
93                + length)
94                .max(0),
95        )
96        .expect("Failed to convert adjusted length to usize");
97        s.chars().skip(start).take(adj).collect()
98    } else {
99        s.chars()
100            .skip(start)
101            .take(usize::try_from(length).expect("Conversion failed"))
102            .collect()
103    }
104}
105
106pub fn len_str(s: &str) -> usize {
107    s.len()
108}
109
110/// strReplace(str, find, replace) – JS version used String.replace with first occurrence.
111/// We replicate first occurrence behavior.
112pub fn str_replace_once(haystack: &str, find: &str, replace: &str) -> String {
113    if find.is_empty() {
114        // JS would replace empty string at position 0; we mimic inserting replace at start.
115        format!("{replace}{haystack}")
116    } else if let Some(pos) = haystack.find(find) {
117        let mut out =
118            String::with_capacity(haystack.len() - find.len() + replace.len());
119        out.push_str(&haystack[..pos]);
120        out.push_str(replace);
121        out.push_str(&haystack[pos + find.len()..]);
122        out
123    } else {
124        haystack.to_string()
125    }
126}
127
128// ---------------
129// Percentage formatting (3 decimals)
130// ---------------
131
132pub fn format_percentage(a: i32, b: i32) -> Result<String> {
133    if b == 0 {
134        bail!("Division by zero in percentage");
135    }
136    if a == 0 {
137        return Ok("0.000".into());
138    }
139    let pct = (f64::from(a) / f64::from(b)) * 100.0;
140    Ok(format!("{pct:.3}"))
141}
142
143/// Basic (non-escaped) split replicating the original StageL strSplit.
144/// Unlike `str.split(sep)` in many languages, this custom implementation:
145/// - Does not collapse consecutive separators
146/// - Returns trailing empty element if the input ends with the separator
147pub fn str_split(input: &str, separator: &str) -> Vec<String> {
148    if separator.is_empty() {
149        // Degenerate: behave like every char boundary (mimic original loop semantics)
150        return input.chars().map(|c| c.to_string()).collect();
151    }
152
153    let mut out = Vec::new();
154    let mut remaining = input;
155    let sep_len = separator.len();
156
157    while !remaining.is_empty() {
158        if remaining.starts_with(separator) {
159            // Push current collected (could be empty), then remove the separator
160            out.push(String::new());
161            remaining = &remaining[sep_len..];
162        } else {
163            // Consume one character
164            let mut char_indices = remaining.char_indices();
165            let (_, first_char) = char_indices.next().unwrap(); // safe
166            let next_index =
167                char_indices.next().map_or(remaining.len(), |(i, _)| i);
168            let ch_str = &remaining[..next_index];
169            if out.is_empty()
170                || !out.is_empty()
171                    && (out.last().unwrap().is_empty()
172                        && input.starts_with(separator))
173            {
174                // Append to a pending token if we started one; otherwise create one.
175            }
176            if out.is_empty()
177                || (out.last().is_some_and(std::string::String::is_empty)
178                    && input.starts_with(separator))
179            {
180                // Usually we only append when we already started; but simpler approach:
181            }
182            // We append the char to a "current" token: emulate original incremental building.
183            if out.is_empty() {
184                out.push(ch_str.to_string());
185            } else {
186                // If last addition was due to encountering a separator, it created a new empty token.
187                if out.last().unwrap().is_empty() && remaining == input {
188                    out.last_mut().unwrap().push_str(ch_str);
189                } else if out.last().unwrap().is_empty() {
190                    out.last_mut().unwrap().push_str(ch_str);
191                } else {
192                    // Continue growing the last token.
193                    out.last_mut().unwrap().push_str(ch_str);
194                }
195            }
196            remaining = &remaining[next_index..];
197        }
198    }
199
200    // Ensure leading separator case:
201    if input.starts_with(separator) {
202        out.insert(0, String::new());
203    }
204
205    out
206}
207
208/// Escape-aware split:
209///
210/// Original logic (strSplitEscaped):
211/// - First performs a plain split.
212/// - Iterates segments; if a segment ends with a backslash (escape char) and there
213///   is a following segment, it replaces the trailing backslash with the separator
214///   and concatenates the next segment to it, continuing (effectively treating the
215///   separator as a literal).
216/// - A dangling trailing backslash (no following segment) is preserved as-is.
217pub fn str_split_escaped(input: &str, separator: &str) -> Vec<String> {
218    let mut exploded = str_split(input, separator);
219    if exploded.is_empty() {
220        return Vec::new();
221    }
222
223    let mut result = Vec::new();
224    let mut k = 0;
225    while k < exploded.len() {
226        let seg = &exploded[k];
227        if seg.ends_with('\\') {
228            if k + 1 >= exploded.len() {
229                // No next segment: push as-is and break
230                result.push(seg.clone());
231                break;
232            }
233            // Merge with next: replace trailing '\' with separator then append next segment
234            let mut merged = seg[..seg.len() - 1].to_string();
235            merged.push_str(separator);
236            merged.push_str(&exploded[k + 1]);
237            // Remove next segment
238            exploded.remove(k + 1);
239            // Replace current
240            exploded[k] = merged;
241            // Step back one (if possible) to re-check for cascading escapes (mimics --$k)
242            k = k.saturating_sub(1);
243            continue;
244        }
245        result.push(seg.clone());
246        k += 1;
247    }
248
249    result
250}
251
252/// Convenience wrappers retained for compatibility naming.
253pub fn str_split_esc(input: &str, separator: &str) -> Vec<String> {
254    str_split_escaped(input, separator)
255}
256
257pub fn explode_esc(input: &str, separator: &str) -> Vec<String> {
258    str_split_escaped(input, separator)
259}
260pub fn explode_escaped(input: &str, separator: &str) -> Vec<String> {
261    str_split_escaped(input, separator)
262}
263
264/// Join with escaping: occurrences of the separator in any element are preceded
265/// by a backslash. A separator is appended after each element (trailing).
266pub fn str_join_escaped(parts: &[String], separator: &str) -> String {
267    if parts.is_empty() {
268        return String::new();
269    }
270    let mut out = String::new();
271    for p in parts {
272        let escaped = p.replace(separator, &format!("\\{separator}"));
273        out.push_str(&escaped);
274        out.push_str(separator);
275    }
276    out
277}
278
279/// Convenience wrapper.
280pub fn str_join_esc(parts: &[String], separator: &str) -> String {
281    str_join_escaped(parts, separator)
282}
283
284/// Join escaped but remove trailing separator.
285pub fn str_join_esc_no_trailing(parts: &[String], separator: &str) -> String {
286    let joined = str_join_escaped(parts, separator);
287    if joined.ends_with(separator) {
288        let cut = joined.len() - separator.len();
289        joined[..cut].to_string()
290    } else {
291        joined
292    }
293}
294
295/// Parse a space-delimited string of decimal integers (the inverse of `str_print_arr<int>`).
296/// Accepts optional trailing space. Rejects any non-digit / non-space characters.
297pub fn int_arr_from_str_printed_arr(s: &str) -> Result<Vec<u32>> {
298    let mut current = String::new();
299    let mut out = Vec::new();
300
301    for b in s.bytes() {
302        if b == b' ' {
303            if !current.is_empty() {
304                out.push(
305                    current
306                        .parse::<u32>()
307                        .map_err(|e| anyhow!("parse int: {e}"))?,
308                );
309                current.clear();
310            }
311        } else if ascii_is_digit(b) {
312            let char = stagel_char_from_byte(b)?;
313            current.push_str(&char.to_string());
314        } else {
315            return Err(anyhow!(
316                "Unexpected character (byte {b}) in int_arr_from_str_printed_arr"
317            ));
318        }
319    }
320    if !current.is_empty() {
321        out.push(
322            current
323                .parse::<u32>()
324                .map_err(|e| anyhow!("parse int: {e}"))?,
325        );
326    }
327    Ok(out)
328}
329
330/// Convert string to vector of raw bytes (0..=255) treating each char's codepoint (0..=255).
331/// For bytes above ASCII range, the direct low-byte value of the char is used (mirroring JS charCodeAt & 0xFF).
332pub fn str_to_byte_array(s: &str) -> Vec<u8> {
333    s.chars()
334        .map(|c| {
335            u8::try_from(u32::from(c) & 0xFF).expect("Failed to convert to u8")
336        })
337        .collect()
338}
339
340/// Build a string from raw bytes (each byte -> U+00XX).
341pub fn str_from_byte_array(bytes: &[u8]) -> String {
342    bytes.iter().map(|b| char::from(*b)).collect()
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    #[crate::ctb_test]
350    fn test_substring_bug_compatible_behavior() {
351        let s = "abcdef";
352        assert_eq!(substring_bug_compatible(s, 0, 3), "abc");
353        assert_eq!(substring_bug_compatible(s, 2, 2), "cd");
354        // Negative length example (len=6): length=-1 => take len+1-1 = 5 chars from start
355        assert_eq!(substring_bug_compatible(s, 0, -1), "abcdef");
356        assert_eq!(substring_bug_compatible(s, 0, -2), "abcde");
357    }
358
359    #[crate::ctb_test]
360    fn test_char_mutation() {
361        let s = "hello";
362        let s2 = set_char_at(s, 1, "A");
363        assert_eq!(s2, "hAllo");
364        assert_eq!(reverse_str("abc"), "cba");
365    }
366
367    #[crate::ctb_test]
368    fn test_case_conversion() {
369        assert_eq!(char_to_upper("a"), "A");
370        assert_eq!(str_to_upper("Abc!"), "ABC!");
371        assert_eq!(char_to_lower("Q"), "q");
372        assert_eq!(str_to_lower("AbC"), "abc");
373    }
374
375    #[crate::ctb_test]
376    fn test_str_contains_only_int() {
377        assert!(str_contains_only_int("12345"));
378        assert!(!str_contains_only_int("12a45"));
379    }
380
381    #[crate::ctb_test]
382    fn test_format_percentage() {
383        let p = format_percentage(1, 2).unwrap();
384        assert_eq!(p, "50.000"); // 1/2 * 100
385        let p = format_percentage(1, 3).unwrap();
386        assert_eq!(p, "33.333"); // 1/3 * 100
387        let p = format_percentage(2, 3).unwrap();
388        assert_eq!(p, "66.667"); // 2/3 * 100
389    }
390
391    #[crate::ctb_test]
392    fn test_str_split_basic() {
393        assert_eq!(
394            str_split("a,b,c", ","),
395            Vec::<String>::from(vec!["a".into(), "b".into(), "c".into()])
396        );
397        assert_eq!(
398            str_split("a,b,c,", ","),
399            Vec::<String>::from(vec![
400                "a".into(),
401                "b".into(),
402                "c".into(),
403                "".into()
404            ])
405        );
406        assert_eq!(str_split("", ","), Vec::<String>::new());
407
408        assert_eq!(
409            str_split("aabbabc", "ab"),
410            Vec::<String>::from(vec!["a".into(), "b".into(), "c".into()])
411        );
412        assert_eq!(
413            str_split("aabbabcab", "ab"),
414            Vec::<String>::from(vec![
415                "a".into(),
416                "b".into(),
417                "c".into(),
418                "".into()
419            ])
420        );
421        assert_eq!(
422            str_split("abc", "ab"),
423            Vec::<String>::from(vec!["".into(), "c".into()])
424        );
425        assert_eq!(
426            str_split("ababbaa", "ab"),
427            Vec::<String>::from(vec!["".into(), "".into(), "baa".into()])
428        );
429        assert_eq!(
430            str_split("aab", "ab"),
431            Vec::<String>::from(vec!["a".into(), "".into()])
432        );
433        assert_eq!(
434            str_split("abaab", "ab"),
435            Vec::<String>::from(vec!["".into(), "a".into(), "".into()])
436        );
437        assert_eq!(
438            str_split("abaabab", "ab"),
439            Vec::<String>::from(vec![
440                "".into(),
441                "a".into(),
442                "".into(),
443                "".into()
444            ])
445        );
446        assert_eq!(
447            str_split("abab", "ab"),
448            Vec::<String>::from(vec!["".into(), "".into(), "".into()])
449        );
450        assert_eq!(
451            str_split("ab", "ab"),
452            Vec::<String>::from(vec!["".into(), "".into()])
453        );
454        assert_eq!(
455            str_split(str_split("abab", "ab").join("ab").as_str(), "ab"),
456            Vec::<String>::from(vec!["".into(), "".into(), "".into()])
457        );
458    }
459
460    #[crate::ctb_test]
461    fn test_str_split_escaped() {
462        // "a\,b" should decode to ["a,b"]
463        let v = str_split_escaped(r"a\,b", ",");
464        assert_eq!(v, vec!["a,b".to_string()]);
465        // Mixed
466        let v2 = str_split_escaped(r"a\,b,c", ",");
467        assert_eq!(v2, vec!["a,b".to_string(), "c".to_string()]);
468        // Escaped separator joining multiple times
469        let v3 = str_split_escaped(r"a\,b\,c,d", ",");
470        assert_eq!(v3, vec!["a,b,c".to_string(), "d".to_string()]);
471    }
472
473    #[crate::ctb_test]
474    fn test_str_join_escaped() {
475        let joined = str_join_escaped(&["a,b".into(), "c".into()], ",");
476        // trailing comma per design
477        assert_eq!(joined, r"a\,b,c,");
478
479        let no_trailing =
480            str_join_esc_no_trailing(&["a,b".into(), "c".into()], ",");
481        assert_eq!(no_trailing, r"a\,b,c");
482    }
483
484    #[crate::ctb_test]
485    fn test_int_arr_from_str_printed_arr() {
486        let v = int_arr_from_str_printed_arr("1 2 3 ").unwrap();
487        assert_eq!(v, vec![1, 2, 3]);
488
489        let v2 = int_arr_from_str_printed_arr("10 20 30").unwrap();
490        assert_eq!(v2, vec![10, 20, 30]);
491
492        // invalid char
493        assert!(int_arr_from_str_printed_arr("1 a 3").is_err());
494    }
495
496    #[crate::ctb_test]
497    fn test_str_to_from_byte_array_roundtrip() {
498        let s = "ABC";
499        let bytes = str_to_byte_array(s);
500        assert_eq!(bytes, vec![65, 66, 67]);
501        let s2 = str_from_byte_array(&bytes);
502        assert_eq!(s2, s);
503    }
504}
ctoolbox/formats/eite/util/string.rs

ctoolbox/formats/eite/util/
string.rs