use crate::formats::eite::formats::utf8::UTF8FormatSettings;
use crate::formats::eite::formats::utf8::dca_to_utf8;
use crate::formats::utf_8e_128::decode_utf_8e_128;
use crate::formats::utf_8e_128::encode_utf_8e_128_buf;
use crate::utilities::*;
use std::collections::HashMap;
use std::iter;
use uuid::Uuid;

pub mod base16b;
pub mod base64;
pub mod eite;
pub mod html;
pub mod ip;
pub mod markdown;
pub mod multipart;
pub mod troff;
pub mod unicode;
pub mod utf8;
pub mod utf_8e_128;
pub mod wtf8;
#[derive(Default, Debug)]
pub struct FormatLog {
    errors: Vec<String>,
    warnings: Vec<String>,
    debug_messages: Vec<String>,
    /// Stores the order and type of all log entries, so that formatting can preserve log order and type.
    log_order: Vec<(LogType, usize)>,
}

/// Tracks the type of log entry.
#[derive(Copy, Clone, Debug)]
enum LogType {
    Error,
    Warning,
    Debug,
}

impl FormatLog {
    /// Record a serious error that may indicate the document could not be fully processed.
    pub fn error(&mut self, message: &str) {
        #[cfg(debug_assertions)]
        crate::debug!("FormatLog error: {}", message);
        self.errors.push(message.to_string());
        self.log_order.push((LogType::Error, self.errors.len() - 1));
    }

    pub fn warn(&mut self, message: &str) {
        #[cfg(debug_assertions)]
        crate::debug!("FormatLog warn: {}", message);
        self.warnings.push(message.to_string());
        self.log_order
            .push((LogType::Warning, self.warnings.len() - 1));
    }

    #[cfg(not(debug_assertions))]
    pub fn debug(&mut self, message: &str) {}

    #[cfg(debug_assertions)]
    pub fn debug(&mut self, message: &str) {
        #[cfg(debug_assertions)]
        crate::debug!("FormatLog debug: {}", message);
        self.debug_messages.push(message.to_string());
        self.log_order
            .push((LogType::Debug, self.debug_messages.len() - 1));
    }

    pub fn get_errors(&self) -> Vec<String> {
        self.errors.clone()
    }

    pub fn get_warnings(&self) -> Vec<String> {
        self.warnings.clone()
    }

    pub fn has_errors(&self) -> bool {
        !self.errors.is_empty()
    }

    pub fn has_no_errors(&self) -> bool {
        !self.has_errors()
    }

    pub fn has_warnings(&self) -> bool {
        !self.warnings.is_empty()
    }

    pub fn has_debug_messages(&self) -> bool {
        !self.debug_messages.is_empty()
    }

    pub fn has_any(&self) -> bool {
        self.has_errors() || self.has_warnings() || self.has_debug_messages()
    }

    pub fn has_warnings_or_errors(&self) -> bool {
        self.has_warnings() || self.has_errors()
    }

    pub fn has_no_warnings_or_errors(&self) -> bool {
        !self.has_warnings_or_errors()
    }

    /// Add an import error at a specific character index and problem description.
    pub fn import_error(&mut self, index: u64, problem: &str) {
        let error = format!(
            "An unrecoverable problem was encountered while importing at character {index}: {problem}"
        );
        self.error(error.as_str());
    }

    /// Add an import warning for a specific character index and problem description.
    pub fn import_warning(&mut self, index: u64, problem: &str) {
        let warn = format!(
            "A problem was encountered while importing at character {index}: {problem}"
        );
        self.warn(warn.as_str());
    }

    /// Add an export error at a specific character index and problem description.
    pub fn export_error(&mut self, index: u64, problem: &str) {
        let error = format!(
            "An unrecoverable problem was encountered while exporting at character {index}: {problem}"
        );
        self.error(error.as_str());
    }

    /// Add an export warning for a specific character index and problem description.
    pub fn export_warning(&mut self, index: u64, problem: &str) {
        let warn = format!(
            "A problem was encountered while exporting at character {index}: {problem}"
        );
        self.warn(warn.as_str());
    }

    pub fn export_warning_unmappable(
        &mut self,
        index: u64,
        problem_dc: u32,
        format: &str,
    ) {
        self.export_warning(index, format!("The character {problem_dc} could not be represented in the chosen export format ({format}).").as_str());
    }

    pub fn merge(&mut self, other: &FormatLog) {
        let error_offset = self.errors.len();
        let warning_offset = self.warnings.len();
        let debug_offset = self.debug_messages.len();

        self.errors.extend(other.errors.clone());
        self.warnings.extend(other.warnings.clone());
        self.debug_messages.extend(other.debug_messages.clone());

        for &(typ, idx) in &other.log_order {
            let adjusted_idx = match typ {
                LogType::Error => idx + error_offset,
                LogType::Warning => idx + warning_offset,
                LogType::Debug => idx + debug_offset,
            };
            self.log_order.push((typ, adjusted_idx));
        }
    }

    /// Formats all log messages in the order they were logged, with proper prefixing.
    pub fn format_all(&self) -> String {
        if !self.has_any() {
            return String::new();
        }
        let mut output = String::new();
        output.push_str("Messages during format processing:\n");
        // idx are not consecutive if printed, this uses them to encode the sort
        // order, I think. They're consecutive w/i each message type.
        for &(typ, idx) in &self.log_order {
            match typ {
                LogType::Error => {
                    output.push_str("* [ERROR] ");
                    output.push_str(&self.errors[idx]);
                }
                LogType::Warning => {
                    output.push_str("- [WARNING] ");
                    output.push_str(&self.warnings[idx]);
                }
                LogType::Debug => {
                    output.push_str("- [DEBUG] ");
                    output.push_str(&self.debug_messages[idx]);
                }
            }
            output.push('\n');
        }
        output
    }

    pub fn format_errors(&self) -> String {
        let mut errors = String::new();
        if self.has_errors() {
            for e in &self.errors {
                errors.push_str("- ");
                errors.push_str(e);
                errors.push('\n');
            }
            format!("Errors during format processing:\n{errors}")
        } else {
            String::new()
        }
    }

    pub fn format_warnings(&self) -> String {
        let mut warnings = String::new();
        if self.has_warnings() {
            for w in &self.warnings {
                warnings.push_str("- ");
                warnings.push_str(w);
                warnings.push('\n');
            }
            format!("Warnings during format processing:\n{warnings}")
        } else {
            String::new()
        }
    }

    pub fn format_debug(&self) -> String {
        let mut debug = String::new();
        if self.debug_messages.is_empty() {
            String::new()
        } else {
            for d in &self.debug_messages {
                debug.push_str("- ");
                debug.push_str(d);
                debug.push('\n');
            }
            format!("Debug messages during format processing:\n{debug}")
        }
    }

    pub fn auto_log(&self) {
        if self.has_any() {
            error!(self.format_errors());
            warn!(self.format_warnings());
            debug!(self.format_debug());
        } else {
            info!("No errors or warnings during format processing.");
        }
    }
}

impl std::fmt::Display for FormatLog {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        if self.has_any() {
            writeln!(f, "{}", self.format_all())?;
        } else {
            writeln!(f, "No errors or warnings during format processing.")?;
        }
        Ok(())
    }
}

pub fn string_result_with_log_to_vec(
    result: Result<(String, FormatLog)>,
) -> Result<(Vec<u8>, FormatLog)> {
    result.map(|res| {
        let result_bytes = res.0.into_bytes();
        (result_bytes, res.1)
    })
}

pub fn get_format_uuids<'a>() -> HashMap<Vec<u8>, Vec<u8>> {
    HashMap::from([(
        strtovec("9ba60c52-9cf8-41a7-b3ea-7a1e14f6c5d7"),
        strtovec("html"),
    )])
}

pub fn get_format_from_uuid(document: Vec<u8>) -> Option<Vec<u8>> {
    let head: Vec<u8>;
    if document.len() < 36 {
        head = document;
    } else {
        head = document[..36].to_vec();
    }
    let uuid = get_uuid_from_document(head);
    uuid.as_ref()?;

    Some(get_format_uuids()[&uuid.expect("checked earlier")].clone())
}

pub fn get_uuid_from_document(document: Vec<u8>) -> Option<Vec<u8>> {
    if document.len() < 16 {
        return None;
    }

    let uuid_binary = Uuid::from_slice(&document[..16])
        .expect("The length should be 16")
        .hyphenated()
        .to_string()
        .into_bytes();
    // let uuid_utf8=String::from_utf8_lossy(&document);
    let uuid_string = String::from_utf8_lossy(&document[..36])
        .to_string()
        .into_bytes();

    let formats = get_format_uuids();

    if formats.contains_key(&uuid_binary) {
        return Some(uuid_binary);
    } else if formats.contains_key(&uuid_string) {
        return Some(uuid_string);
    }

    None
}

pub fn convert_if_needed(document: Vec<u8>) -> Vec<u8> {
    // TODO

    document
}

pub fn convert_from(document: Vec<u8>, filetype: Vec<u8>) -> Vec<u8> {
    // TODO

    document
}

pub fn sanitize_html(document: Vec<u8>) -> Vec<u8> {
    let mut builder = ammonia::Builder::default();

    builder.add_generic_attributes(iter::once("class").chain(iter::once("id")));

    builder
        .clean(&String::from_utf8_lossy(&document))
        .to_string()
        .into_bytes()
}

// Converts dctext to dcutf
pub fn dctext_to_dcutf(document: Vec<u8>) -> Vec<u8> {
    // Format looks like (w/o backticks): `Unicode (UTF-8) text @123@miesu@214748364@@L662@`
    // where `Unicode text` is actual unicode text, and between each pair of @ signs, is a DcId. A DcId can be any int 128 bits (u128) in decimal, and it may have an `L` prefix.
    // Output format is sort of UTF-8 text. For normal Unicode input characters, the output character is the same. For DcIds less than or equal to 1114111 (the largest Unicode character, I believe), the output character is the corresponding "generalized UTF-8", the numeric value encoded in the same underlying algorithm as UTF-8. For DcIds greater than 1114111 and not prefixed with an L, the output character is the decimal DcId represented by extending the usual algorithm of UTF-8 encoding, but for those larger numbers. For DcIds prefixed with an L, the output is equivalent to @1114408@ followed by the Dc that followed the L (the L is just a shorthand for that 1114408 Dc). That is to say, it's not a true Unicode encoding, it's simply using an extension of the algorithm underlying UTF-8 as a convenient encoding of ints.
    let document = String::from_utf8(document).unwrap();
    let mut output = Vec::new();
    for line in document.lines() {
        let mut rest = line;
        while let Some(start) = rest.find('@') {
            // Output text before @ as plain UTF-8
            output.extend_from_slice(&rest.as_bytes()[..start]);
            rest = &rest[start + 1..];
            if let Some(end) = rest.find('@') {
                let token = &rest[..end];
                let mut dcid_str = token;
                let mut l_prefix = false;
                if dcid_str.is_empty() {
                    dcid_str = "64"; // @ sign
                }
                if dcid_str.starts_with('L') {
                    l_prefix = true;
                    dcid_str = &dcid_str[1..];
                }
                if let Ok(dcid) = dcid_str.parse::<u128>() {
                    let mut buf = [0u8; 24];
                    if l_prefix {
                        // Output as UTF-8 codepoint 1114408, then the dcid
                        let n1 = encode_utf_8e_128_buf(&mut buf, 1114408);
                        output.extend_from_slice(&buf[..n1]);
                        let n2 = encode_utf_8e_128_buf(&mut buf, dcid);
                        output.extend_from_slice(&buf[..n2]);
                    } else {
                        let n = encode_utf_8e_128_buf(&mut buf, dcid);
                        output.extend_from_slice(&buf[..n]);
                    }
                }
                rest = &rest[end + 1..];
            } else {
                // No matching @, output the rest and break
                output.extend_from_slice(rest.as_bytes());
                break;
            }
        }
        // Output any remaining text
        output.extend_from_slice(rest.as_bytes());
    }

    output
}

// Converts dcutf to dctext
pub fn dcutf_to_dctext(document: Vec<u8>) -> Vec<u8> {
    let mut output = String::new();
    let mut i = 0;
    while i < document.len() {
        if let Some((codepoint, size)) = decode_utf_8e_128(&document[i..]) {
            if codepoint == 64 {
                output.push_str("@@");
            } else if codepoint <= 0x10FFFF {
                // Normal Unicode character
                if let Some(ch) = std::char::from_u32(
                    u32::try_from(codepoint).expect("Checked range already"),
                ) {
                    output.push(ch);
                } else {
                    output.push_str(format!("@{codepoint}@").as_str());
                }
            } else if codepoint == 1114408 {
                // L-prefixed DcId follows
                i += size;
                if let Some((next_dc, next_size)) =
                    decode_utf_8e_128(&document[i..])
                {
                    output.push_str(format!("@L{next_dc}@").as_str());
                    i += next_size;
                    continue;
                }
                output.push_str("@1114408@");
            } else {
                // Generalized DcId
                output.push_str(format!("@{codepoint}@").as_str());
            }
            i += size;
        } else {
            // Not valid UTF-8, output as is
            output.push(char::from(document[i]));
            i += 1;
        }
    }
    output.into_bytes()
}

// Trait to extend char with a as_utf8_bytes() convenience method
pub trait CharUtfBytesExt {
    fn as_utf8_bytes(&self) -> Vec<u8>;
}

impl CharUtfBytesExt for char {
    /// Similar to `encode_utf8` - more convenient, but slower and copies.
    fn as_utf8_bytes(&self) -> Vec<u8> {
        let mut buf = [0u8; 4];
        let s = self.encode_utf8(&mut buf);
        s.as_bytes().to_vec()
    }
}

// Test helpers
pub fn assert_vec_u8_ok_eq_no_warnings(
    expected: &[u8],
    actual: Result<(Vec<u8>, FormatLog)>,
) -> Vec<u8> {
    let (actual_bytes, log) = actual.unwrap();
    assert!(
        log.has_no_warnings_or_errors(),
        "Warnings or errors found:\n{}",
        log.format_all()
    );
    assert_vec_u8_eq_log(expected, &actual_bytes, &log);
    actual_bytes
}

pub fn assert_vec_u8_ok_eq_no_errors(
    expected: &[u8],
    actual: Result<(Vec<u8>, FormatLog)>,
) -> (Vec<u8>, FormatLog) {
    let (actual_bytes, log) = actual.unwrap();

    assert!(log.has_no_errors(), "Errors found:\n{}", log.format_all());
    assert_vec_u8_eq_log(expected, &actual_bytes, &log);
    (actual_bytes, log)
}

fn _format_dcs_for_log(expected: &[u32], actual: &[u32]) -> String {
    let (utf8_expected, _) =
        dca_to_utf8(expected, &UTF8FormatSettings::default())
            .unwrap_or_default();
    let (utf8_actual, _) =
        dca_to_utf8(actual, &UTF8FormatSettings::default()).unwrap_or_default();

    format!(
        "Expected formatted Dcs: {}\nActual formatted Dcs: {}",
        String::from_utf8_lossy(&utf8_expected),
        String::from_utf8_lossy(&utf8_actual)
    )
}

fn _assert_vec_u32_ok_eq_log(
    expected: &[u32],
    actual: Result<(Vec<u32>, FormatLog)>,
    print_dcs: bool,
    disallow_warnings: bool,
) -> (Vec<u32>, FormatLog) {
    let (actual_vec, log) = actual.unwrap();

    let mut log_problem_type = "Errors";
    if disallow_warnings {
        log_problem_type = "Warnings or errors";
    }
    let mut message =
        format!("{log_problem_type} found:\n{}", log.format_all());

    if print_dcs {
        message.push_str(&_format_dcs_for_log(expected, &actual_vec));
    }

    if disallow_warnings {
        assert!(log.has_no_warnings_or_errors(), "{message}");
    } else {
        assert!(log.has_no_errors(), "{message}");
    }

    if print_dcs {
        assert_vec_dc_eq_log(expected, &actual_vec, &log);
    } else {
        assert_vec_u32_eq_log(expected, &actual_vec, &log);
    }
    (actual_vec, log)
}

pub fn assert_vec_u32_ok_eq_no_warnings(
    expected: &[u32],
    actual: Result<(Vec<u32>, FormatLog)>,
) -> (Vec<u32>, FormatLog) {
    _assert_vec_u32_ok_eq_log(expected, actual, false, true)
}

pub fn assert_vec_dc_ok_eq_no_warnings(
    expected: &[u32],
    actual: Result<(Vec<u32>, FormatLog)>,
) -> (Vec<u32>, FormatLog) {
    _assert_vec_u32_ok_eq_log(expected, actual, true, true)
}

pub fn assert_vec_u32_ok_eq_no_errors(
    expected: &[u32],
    actual: Result<(Vec<u32>, FormatLog)>,
) -> (Vec<u32>, FormatLog) {
    _assert_vec_u32_ok_eq_log(expected, actual, false, false)
}

pub fn assert_vec_dc_ok_eq_no_errors(
    expected: &[u32],
    actual: Result<(Vec<u32>, FormatLog)>,
) -> (Vec<u32>, FormatLog) {
    _assert_vec_u32_ok_eq_log(expected, actual, true, false)
}

/// Equivalent to `assert_vec_u32_eq`, but prints the provided log on failure
pub fn assert_vec_u32_eq_log(
    expected: &[u32],
    actual: &[u32],
    log: &FormatLog,
) {
    _assert_vec_u32_eq_log(expected, actual, log, false);
}

pub fn assert_vec_dc_eq_log(expected: &[u32], actual: &[u32], log: &FormatLog) {
    _assert_vec_u32_eq_log(expected, actual, log, true);
}

fn _assert_vec_u32_eq_log(
    expected: &[u32],
    actual: &[u32],
    log: &FormatLog,
    print_dcs: bool,
) {
    let mut message = format!(
        "Vectors (u32) differ.\n{}\nLog:      {}",
        fmt_mismatch_vec_u32(expected, actual),
        log.format_all()
    );

    if print_dcs {
        message.push_str(&_format_dcs_for_log(expected, actual));
    }

    assert_eq!(expected, actual, "{message}");
}

/// Equivalent to `assert_vec_u8_eq`, but prints the provided log on failure
pub fn assert_vec_u8_eq_log(expected: &[u8], actual: &[u8], log: &FormatLog) {
    assert_eq!(
        expected,
        actual,
        "Vectors (u8) differ.\n{}\nLog:      {}",
        fmt_mismatch_vec_u8(expected, actual),
        log.format_all()
    );
}

pub fn assert_string_eq(expected: &str, actual: String) -> String {
    let actual_string = actual;

    assert_eq!(
        expected,
        &actual_string,
        "Strings differ.\n{}",
        fmt_mismatch_string(expected, &actual_string),
    );
    actual_string
}

pub fn assert_string_ok_eq(expected: &str, actual: Result<String>) -> String {
    let actual_string = actual.unwrap();

    assert_eq!(
        expected,
        &actual_string,
        "Strings differ.\n{}",
        fmt_mismatch_string(expected, &actual_string),
    );
    actual_string
}

pub fn assert_string_ok_eq_no_warnings(
    expected: &str,
    actual: Result<(String, FormatLog)>,
) -> (String, FormatLog) {
    let (actual_string, log) = actual.unwrap();

    assert!(
        log.has_no_warnings_or_errors(),
        "Warnings or errors found:\n{}",
        log.format_all()
    );
    assert_eq!(
        expected,
        &actual_string,
        "Strings differ.\n{}\nLog:      {}",
        fmt_mismatch_string(expected, &actual_string),
        log.format_all()
    );
    (actual_string, log)
}

pub fn assert_string_ok_eq_no_errors(
    expected: &str,
    actual: Result<(String, FormatLog)>,
) -> (String, FormatLog) {
    let (actual_string, log) = actual.unwrap();

    assert!(log.has_no_errors(), "Errors found:\n{}", log.format_all());
    assert_eq!(
        expected,
        &actual_string,
        "Strings differ.\n{}\nLog:      {}",
        fmt_mismatch_string(expected, &actual_string),
        log.format_all()
    );
    (actual_string, log)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[crate::ctb_test]
    fn test_dctext_to_dcutf() {
        let text = "hi @64@ @@ @65@ @128@ there 🥴 @L42@ noncharacter @1114111@ surrogate @56191@ unicode null @0@ dc null @1114112@ @2147483648@ 2^128-1 @340282366920938463463374607431768211455@";
        let dcutf = dctext_to_dcutf(text.as_bytes().to_vec());
        assert_eq!(
            "686920402040204120c28020746865726520f09fa5b420ff84849084a82a206e6f6e63686172616374657220f48fbfbf20737572726f6761746520edadbf20756e69636f6465206e756c6c2000206463206e756c6c20ff848490808020ff8682808080808020325e3132382d3120ff9683bfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbfbf",
            vectohex(&dcutf)
        );

        let roundtrip = dcutf_to_dctext(dcutf.clone());
        let roundtrip_str = String::from_utf8(roundtrip).unwrap();

        // Should match original
        let expected_roundtrip = "hi @@ @@ A \u{80} there 🥴 @L42@ noncharacter 􏿿 surrogate @56191@ unicode null \u{0} dc null @1114112@ @2147483648@ 2^128-1 @340282366920938463463374607431768211455@";
        assert!(roundtrip_str.eq(expected_roundtrip));
    }

    #[crate::ctb_test]
    fn can_get_uuid_from_document() {
        assert_eq!(
            strtovec("9ba60c52-9cf8-41a7-b3ea-7a1e14f6c5d7"),
            get_uuid_from_document(strtovec(
                "9ba60c52-9cf8-41a7-b3ea-7a1e14f6c5d7<html>"
            ))
            .unwrap()
        );
    }

    #[crate::ctb_test]
    fn can_get_format_from_uuid() {
        assert_eq!(
            strtovec("html"),
            get_format_from_uuid(strtovec(
                "9ba60c52-9cf8-41a7-b3ea-7a1e14f6c5d7<html>"
            ))
            .unwrap()
        );
    }
}
