1use std::env;
35use std::fs;
36use std::io::{self, Write};
37use std::path::Path;
38
39use anyhow::Result;
40use clap::crate_name;
41use clap::crate_version;
42use num_enum::IntoPrimitive;
43
44use crate::formats::FormatLog;
45
46#[derive(Copy, Clone, Debug, PartialEq, Eq)]
51enum Font {
52 Regular,
53 Bold,
54 Italic,
55 Small,
56 SmallBold,
57 Monospace,
58}
59
60#[derive(Copy, Clone, Debug, PartialEq, Eq, IntoPrimitive)]
61#[repr(u8)]
62enum Heading {
63 Topic,
64 Section,
65 Subsection,
66}
67
68struct State {
69 wrote_header: bool,
70 in_block: Option<&'static str>,
71 in_link: bool,
72 indent: usize,
73 author: Option<String>,
74 chapter: Option<String>,
75 copyright: Option<String>,
76 css: Option<String>,
77 subject: Option<String>,
78 title: Option<String>,
79
80 atopic: String,
81 asection: String,
82 font: Font,
83}
84
85impl State {
86 fn new() -> Self {
87 State {
88 wrote_header: false,
89 in_block: None,
90 in_link: false,
91 indent: 0,
92 author: None,
93 chapter: None,
94 copyright: None,
95 css: None,
96 subject: None,
97 title: None,
98 atopic: String::new(),
99 asection: String::new(),
100 font: Font::Regular,
101 }
102 }
103}
104
105fn main() {
110 let mut args: Vec<String> = env::args().collect();
111 if args.is_empty() {
112 args.push("mantohtml".into());
113 }
114
115 let mut state = State::new();
116
117 let mut i = 1usize;
118 let mut end_of_options = false;
119
120 if args.len() == 1 {
121 usage(None);
122 std::process::exit(1);
123 }
124
125 while i < args.len() {
126 let arg = &args[i];
127 if !end_of_options && arg == "--author" {
128 i += 1;
129 if i >= args.len() {
130 eprintln!("mantohtml: Missing author after --author.");
131 std::process::exit(1);
132 }
133 state.author = Some(args[i].clone());
134 } else if !end_of_options && arg == "--chapter" {
135 i += 1;
136 if i >= args.len() {
137 eprintln!("mantohtml: Missing chapter after --chapter.");
138 std::process::exit(1);
139 }
140 state.chapter = Some(args[i].clone());
141 } else if !end_of_options && arg == "--copyright" {
142 i += 1;
143 if i >= args.len() {
144 eprintln!("mantohtml: Missing copyright after --copyright.");
145 std::process::exit(1);
146 }
147 state.copyright = Some(args[i].clone());
148 } else if !end_of_options && arg == "--css" {
149 i += 1;
150 if i >= args.len() {
151 eprintln!(
152 "mantohtml: Missing CSS filename or URL after --css."
153 );
154 std::process::exit(1);
155 }
156 state.css = Some(args[i].clone());
157 } else if !end_of_options && arg == "--help" {
158 usage(None);
159 return;
160 } else if !end_of_options && arg == "--subject" {
161 i += 1;
162 if i >= args.len() {
163 eprintln!("mantohtml: Missing subject after --subject.");
164 std::process::exit(1);
165 }
166 state.subject = Some(args[i].clone());
167 } else if !end_of_options && arg == "--title" {
168 i += 1;
169 if i >= args.len() {
170 eprintln!("mantohtml: Missing title after --title.");
171 std::process::exit(1);
172 }
173 state.title = Some(args[i].clone());
174 } else if !end_of_options && arg == "--version" {
175 println!("{}", crate_version!());
176 return;
177 } else if !end_of_options && arg == "--" {
178 end_of_options = true;
179 } else if !end_of_options && arg.starts_with('-') {
180 usage(Some(arg));
181 std::process::exit(1);
182 } else {
183 if let Err(err) = convert_man_to_stdout(&mut state, arg) {
185 eprintln!("{err}");
186 }
187 }
188 i += 1;
189 }
190
191 if !state.wrote_header {
192 usage(None);
193 std::process::exit(1);
194 }
195}
196
197fn convert_man_to_stdout(
202 state: &mut State,
203 filename: &str,
204) -> Result<(), String> {
205 let data = fs::read(filename).map_err(|e| format!("{filename}: {e}"))?;
206
207 let basepath;
208 if let Some(parent) = Path::new(filename).parent() {
209 basepath = parent.to_string_lossy().to_string();
210 } else {
211 basepath = ".".into();
212 }
213
214 convert_man_from_data(
215 state,
216 data,
217 Some(filename),
218 Some(basepath.as_str()),
219 io::stdout(),
220 )
221}
222
223pub fn convert_man_troff_to_html(
224 data: Vec<u8>,
225) -> Result<(Vec<u8>, FormatLog)> {
226 let mut state = State::new();
227 let mut buffer: Vec<u8> = Vec::new();
228 let mut log: FormatLog = FormatLog::default();
229 let result =
230 convert_man_from_data(&mut state, data, None, None, &mut buffer);
231
232 if let Err(err) = result {
233 log.error(&err);
234 return Ok((buffer, log));
235 }
236
237 Ok((buffer, log))
238}
239
240fn convert_man_from_data<W: Write>(
241 state: &mut State,
242 data: Vec<u8>,
243 filename: Option<&str>,
244 basepath: Option<&str>,
245 out: W,
246) -> Result<(), String> {
247 let mut reader = ManReader::new(&data);
248
249 let mut th_seen = false;
250 let mut warning = false;
251 let mut break_text = String::new();
252
253 let mut stdout = io::BufWriter::new(out);
254
255 let filename = filename.unwrap_or("document");
256
257 while let Some(line) = reader.man_gets() {
258 if line.starts_with('.') {
260 let mut lp: &str = &line;
261 let macro_name = parse_value(&mut lp).unwrap_or_default();
262
263 if macro_name == "." {
264 continue;
265 } else if macro_name == ".TH" {
266 let title_val = parse_value(&mut lp).unwrap_or_default();
268 if title_val.is_empty() {
269 return Err(format!(
270 "mantohtml: Missing title in '.TH' on line {} of '{}'.",
271 reader.line_number(),
272 filename
273 ));
274 }
275 let section = parse_value(&mut lp).unwrap_or_default();
276 if section.is_empty()
277 || !section.chars().next().unwrap_or(' ').is_ascii_digit()
278 {
279 return Err(format!(
280 "mantohtml: Missing section in '.TH' on line {} of '{}'.",
281 reader.line_number(),
282 filename
283 ));
284 }
285 let topic = format!("{title_val}({section})");
286
287 if state.wrote_header {
288 if state.in_link {
289 writeln!(stdout, "</a>").ok();
290 state.in_link = false;
291 }
292 if let Some(block) = state.in_block.take() {
293 writeln!(stdout, "</{block}>").ok();
294 }
295 } else {
296 html_header(state, &topic, &mut stdout);
297 }
298
299 html_heading(state, Heading::Topic, &topic, &mut stdout);
300 th_seen = true;
301 } else if !th_seen {
302 if !warning {
303 eprintln!(
304 "mantohtml: Need '.TH' before '{}' macro on line {} of '{}'.",
305 macro_name,
306 reader.line_number(),
307 filename
308 );
309 warning = true;
310 }
311 continue;
312 } else if macro_name == ".B" {
313 let mut remainder = lp.to_owned();
314 if remainder.trim().is_empty() {
315 if let Some(next_line) = reader.man_gets() {
316 remainder = next_line;
317 }
318 }
319 push_font(state, Font::Bold, &mut stdout);
320 man_puts(state, &remainder, &mut stdout);
321 pop_font(state, &mut stdout);
322 writeln!(stdout, "{break_text}").ok();
323 break_text.clear();
324 } else if macro_name == ".BI" {
325 handle_xx(
326 state,
327 Font::Bold,
328 Font::Italic,
329 &mut reader,
330 lp,
331 &mut break_text,
332 &mut stdout,
333 basepath,
334 );
335 } else if macro_name == ".BR" {
336 handle_xx(
337 state,
338 Font::Bold,
339 Font::Regular,
340 &mut reader,
341 lp,
342 &mut break_text,
343 &mut stdout,
344 basepath,
345 );
346 } else if macro_name == ".EE" || macro_name == ".fi" {
347 if state.in_block == Some("pre") {
348 writeln!(stdout, "</pre>").ok();
349 state.in_block = None;
350 } else {
351 eprintln!(
352 "mantohtml: '{}' with no '.EX' or '.nf' on line {} of '{}'.",
353 macro_name,
354 reader.line_number(),
355 filename
356 );
357 }
358 } else if macro_name == ".EX" || macro_name == ".nf" {
359 close_link_if(state, &mut stdout);
360 close_block_if(state, &mut stdout);
361 write!(stdout, " <pre>").ok();
362 state.in_block = Some("pre");
363 } else if macro_name == ".HP" {
364 let mut lp2 = lp;
365 let indent = parse_measurement(&mut lp2, 'n')
366 .unwrap_or_else(|| "2.5em".into());
367 close_link_if(state, &mut stdout);
368 close_block_if(state, &mut stdout);
369 write!(
370 stdout,
371 " <p style=\"margin-left: {indent}; text-indent: -{indent};\">"
372 )
373 .ok();
374 state.in_block = Some("p");
375 } else if macro_name == ".I" {
376 let mut remainder = lp.to_owned();
377 if remainder.trim().is_empty() {
378 if let Some(next_line) = reader.man_gets() {
379 remainder = next_line;
380 }
381 }
382 push_font(state, Font::Italic, &mut stdout);
383 man_puts(state, &remainder, &mut stdout);
384 pop_font(state, &mut stdout);
385 writeln!(stdout, "{break_text}").ok();
386 break_text.clear();
387 } else if macro_name == ".IB" {
388 handle_xx(
389 state,
390 Font::Italic,
391 Font::Bold,
392 &mut reader,
393 lp,
394 &mut break_text,
395 &mut stdout,
396 basepath,
397 );
398 } else if macro_name == ".IP" {
399 let mut lp2 = lp;
400 let tag = parse_value(&mut lp2).unwrap_or_default();
401 let indent = parse_measurement(&mut lp2, 'n')
402 .unwrap_or_else(|| "2.5em".into());
403 close_link_if(state, &mut stdout);
404 if let Some(block) = state.in_block {
405 if block != "ul" {
406 writeln!(stdout, "</{block}>").ok();
407 state.in_block = None;
408 }
409 }
410 if state.in_block.is_none() {
411 writeln!(stdout, " <ul>").ok();
412 state.in_block = Some("ul");
413 }
414 let list_style = if tag == "\\(bu" || tag == "-" || tag == "*" {
415 ""
416 } else {
417 "list-style-type: none; "
418 };
419 write!(
420 stdout,
421 " <li style=\"{list_style}margin-left: {indent};\">"
422 )
423 .ok();
424 } else if macro_name == ".IR" {
425 handle_xx(
426 state,
427 Font::Italic,
428 Font::Regular,
429 &mut reader,
430 lp,
431 &mut break_text,
432 &mut stdout,
433 basepath,
434 );
435 } else if macro_name == ".LP"
436 || macro_name == ".P"
437 || macro_name == ".PP"
438 {
439 close_link_if(state, &mut stdout);
440 close_block_if(state, &mut stdout);
441 write!(stdout, " <p>").ok();
442 state.in_block = Some("p");
443 } else if macro_name == ".ME" || macro_name == ".UE" {
444 if state.in_link {
445 writeln!(stdout, "</a>").ok();
446 state.in_link = false;
447 }
448 } else if macro_name == ".MT" {
449 let mut lp2 = lp;
450 let email = parse_value(&mut lp2).unwrap_or_default();
451 if !email.is_empty() {
452 write!(
453 stdout,
454 "<a href=\"mailto:{}\">",
455 html_escape(&email)
456 )
457 .ok();
458 state.in_link = true;
459 }
460 } else if macro_name == ".RB" {
461 handle_xx(
462 state,
463 Font::Regular,
464 Font::Bold,
465 &mut reader,
466 lp,
467 &mut break_text,
468 &mut stdout,
469 basepath,
470 );
471 } else if macro_name == ".RE" {
472 if state.indent > 0 {
473 writeln!(stdout, " </div>").ok();
474 state.indent -= 1;
475 } else {
476 eprintln!(
477 "mantohtml: Unbalanced '.RE' on line {} of '{}'.",
478 reader.line_number(),
479 filename
480 );
481 }
482 } else if macro_name == ".RS" {
483 let mut lp2 = lp;
484 let indent = parse_measurement(&mut lp2, 'n')
485 .unwrap_or_else(|| "0.5in".into());
486 writeln!(stdout, " <div style=\"margin-left: {indent};\">")
487 .ok();
488 state.indent += 1;
489 } else if macro_name == ".SB" {
490 let mut remainder = lp.to_owned();
491 if remainder.trim().is_empty() {
492 if let Some(next_line) = reader.man_gets() {
493 remainder = next_line;
494 }
495 }
496 push_font(state, Font::SmallBold, &mut stdout);
497 man_puts(state, &remainder, &mut stdout);
498 pop_font(state, &mut stdout);
499 writeln!(stdout, "{break_text}").ok();
500 break_text.clear();
501 } else if macro_name == ".SH" {
502 close_link_if(state, &mut stdout);
503 close_block_if(state, &mut stdout);
504 html_heading(state, Heading::Section, lp, &mut stdout);
505 } else if macro_name == ".SM" {
506 let mut remainder = lp.to_owned();
507 if remainder.trim().is_empty() {
508 if let Some(next_line) = reader.man_gets() {
509 remainder = next_line;
510 }
511 }
512 push_font(state, Font::Small, &mut stdout);
513 man_puts(state, &remainder, &mut stdout);
514 pop_font(state, &mut stdout);
515 writeln!(stdout, "{break_text}").ok();
516 break_text.clear();
517 } else if macro_name == ".SS" {
518 close_link_if(state, &mut stdout);
519 close_block_if(state, &mut stdout);
520 html_heading(state, Heading::Subsection, lp, &mut stdout);
521 } else if macro_name == ".SY" {
522 if let Some(block) = state.in_block.take() {
523 writeln!(stdout, "</{block}>").ok();
524 }
525 write!(stdout, " <p style=\"font-family: monospace;\">")
526 .ok();
527 state.in_block = Some("p");
528 } else if macro_name == ".TP" {
529 let mut lp2 = lp;
530 let indent = parse_measurement(&mut lp2, 'n')
531 .unwrap_or_else(|| "2.5em".into());
532 close_link_if(state, &mut stdout);
533 close_block_if(state, &mut stdout);
534 write!(
535 stdout,
536 " <p style=\"margin-left: {indent}; text-indent: -{indent};\">"
537 )
538 .ok();
539 state.in_block = Some("p");
540 break_text = "<br>".into();
541 } else if macro_name == ".UR" {
542 let mut lp2 = lp;
543 let url = parse_value(&mut lp2).unwrap_or_default();
544 if !url.is_empty() {
545 write!(stdout, "<a href=\"{}\">", html_escape(&url)).ok();
546 state.in_link = true;
547 }
548 } else if macro_name == ".YS" {
549 if state.in_block == Some("p") {
550 writeln!(stdout, "</p>").ok();
551 state.in_block = None;
552 } else {
553 eprintln!(
554 "mantohtml: '.YS' seen without prior '.SY' on line {} of '{}'.",
555 reader.line_number(),
556 filename
557 );
558 }
559 } else if macro_name == ".br" {
560 writeln!(stdout, "<br>").ok();
561 } else if macro_name == ".in" {
562 let mut lp2 = lp;
563 if let Some(indent) = parse_measurement(&mut lp2, 'm') {
564 writeln!(
565 stdout,
566 " <div style=\"margin-left: {};\">",
567 html_escape(&indent)
568 )
569 .ok();
570 state.indent += 1;
571 } else if state.indent > 0 {
572 writeln!(stdout, " </div>").ok();
573 state.indent -= 1;
574 } else {
575 eprintln!(
576 "mantohtml: '.in' seen without prior '.in INDENT' on line {} of '{}'.",
577 reader.line_number(),
578 filename
579 );
580 }
581 } else if macro_name == ".sp" {
582 writeln!(stdout, "<br> <br>").ok();
583 } else {
584 eprintln!(
585 "mantohtml: Unsupported command/macro '{}' on line {} of '{}'.",
586 macro_name,
587 reader.line_number(),
588 filename
589 );
590 }
591 } else if th_seen {
592 if state.in_block.is_none() {
594 write!(stdout, "<p>").ok();
595 state.in_block = Some("p");
596 }
597 man_puts(state, &line, &mut stdout);
598 writeln!(stdout, "{break_text}").ok();
599 break_text.clear();
600 } else if !line.is_empty() && !warning {
601 eprintln!(
602 "mantohtml: Ignoring text before '.TH' on line {} of '{}'.",
603 reader.line_number(),
604 filename
605 );
606 warning = true;
607 }
608 }
609
610 if state.wrote_header {
611 html_footer(state, &mut stdout);
612 }
613
614 stdout.flush().ok();
615 Ok(())
616}
617
618struct ManReader<'a> {
623 data: &'a [u8],
624 pos: usize,
625 line: usize,
626}
627
628impl<'a> ManReader<'a> {
629 fn new(data: &'a [u8]) -> Self {
630 Self {
631 data,
632 pos: 0,
633 line: 0,
634 }
635 }
636
637 fn line_number(&self) -> usize {
638 self.line
639 }
640
641 fn getc(&mut self) -> Option<u8> {
642 if self.pos >= self.data.len() {
643 None
644 } else {
645 let c = self.data[self.pos];
646 self.pos += 1;
647 Some(c)
648 }
649 }
650
651 fn ungetc(&mut self) {
652 if self.pos > 0 {
653 self.pos -= 1;
654 }
655 }
656
657 fn man_gets(&mut self) -> Option<String> {
658 let mut out = Vec::new();
659 while let Some(c) = self.getc() {
660 if c == b'\n' {
661 self.line += 1;
662 break;
663 } else if c == b'\\' {
664 if let Some(nc) = self.getc() {
665 if nc == b'\n' {
666 self.line += 1;
668 continue;
669 } else if nc == b'"' {
670 while let Some(cc) = self.getc() {
672 if cc == b'\n' {
673 self.line += 1;
674 break;
675 }
676 }
677 break;
678 }
679 out.push(b'\\');
680 out.push(nc);
681 } else {
682 break;
683 }
684 } else {
685 out.push(c);
686 }
687 }
688
689 if out.is_empty() && self.pos >= self.data.len() {
690 None
691 } else {
692 Some(String::from_utf8_lossy(&out).to_string())
693 }
694 }
695}
696
697fn html_escape(s: &str) -> String {
702 let mut out = String::with_capacity(s.len());
703 for ch in s.chars() {
704 match ch {
705 '&' => out.push_str("&"),
706 '<' => out.push_str("<"),
707 '"' => out.push_str("""),
710 _ => out.push(ch),
712 }
713 }
714 out
715}
716
717fn html_anchor(s: &str) -> String {
718 let mut out = String::new();
719 let bytes = s.as_bytes();
720 let mut i = 0usize;
721 while i < bytes.len() {
722 let c = char::from(bytes[i]);
723 if c.is_ascii_alphanumeric() || c == '.' || c == '-' {
724 out.push(c.to_ascii_lowercase());
725 } else if c == '(' || c == ' ' || c == '\t' {
726 if i + 1 < bytes.len() && !out.ends_with('-') {
727 out.push('-');
728 }
729 }
730 i += 1;
731 if out.len() >= 255 {
732 break;
733 }
734 }
735 out
736}
737
738fn html_header(state: &mut State, topic: &str, w: &mut impl Write) {
739 if state.wrote_header {
740 return;
741 }
742 state.wrote_header = true;
743 writeln!(w, "<!DOCTYPE html>").ok();
744 writeln!(w, "<html>").ok();
745 writeln!(w, " <head>").ok();
746
747 if let Some(css) = &state.css {
748 if css.starts_with("http://") || css.starts_with("https://") {
749 writeln!(
750 w,
751 " <link rel=\"stylesheet\" type=\"text/css\" href=\"{}\">",
752 html_escape(css)
753 )
754 .ok();
755 } else {
756 writeln!(w, " <style><!--").ok();
757 if let Ok(text) = fs::read_to_string(css) {
758 write!(w, "{text}").ok();
759 }
760 writeln!(w, "--></style>").ok();
761 }
762 }
763 if let Some(author) = &state.author {
764 writeln!(
765 w,
766 " <meta name=\"author\" content=\"{}\">",
767 html_escape(author)
768 )
769 .ok();
770 }
771 if let Some(c) = &state.copyright {
772 writeln!(
773 w,
774 " <meta name=\"copyright\" content=\"{}\">",
775 html_escape(c)
776 )
777 .ok();
778 }
779 writeln!(
780 w,
781 " <meta name=\"creator\" content=\"{} v{} convert_man_troff_to_html\">",
782 crate_name!(),
783 crate_version!()
784 )
785 .ok();
786 if let Some(subject) = &state.subject {
787 writeln!(
788 w,
789 " <meta name=\"subject\" content=\"{}\">",
790 html_escape(subject)
791 )
792 .ok();
793 }
794 let title = state.title.as_deref().unwrap_or({
795 if topic.is_empty() {
796 "Documentation"
797 } else {
798 topic
799 }
800 });
801 writeln!(w, " <title>{}</title>", html_escape(title)).ok();
802 writeln!(w, " </head>").ok();
803 writeln!(w, " <body>").ok();
804
805 if let Some(chapter) = &state.chapter {
806 let anchor = html_anchor(chapter);
807 writeln!(w, " <h1 id=\"{}\">{}</h1>", anchor, html_escape(chapter))
808 .ok();
809 }
810}
811
812fn html_footer(state: &mut State, w: &mut impl Write) {
813 if state.wrote_header {
814 writeln!(w, " </body>").ok();
815 writeln!(w, "</html>").ok();
816 state.wrote_header = false;
817 }
818}
819
820fn push_font(state: &mut State, font: Font, w: &mut impl Write) {
821 if state.font == font && state.in_block.is_some() {
822 return;
823 }
824 if state.font != Font::Regular {
826 match state.font {
827 Font::Bold => write!(w, "</strong>").ok(),
828 Font::Italic => write!(w, "</em>").ok(),
829 Font::Small => write!(w, "</small>").ok(),
830 Font::SmallBold => write!(w, "</small>").ok(),
831 Font::Monospace => write!(w, "</pre>").ok(),
832 Font::Regular => Some(()),
833 };
834 }
835 if state.in_block.is_none() {
836 write!(w, "<p>").ok();
837 state.in_block = Some("p");
838 }
839 match font {
840 Font::Regular => Some(()),
841 Font::Bold => write!(w, "<strong>").ok(),
842 Font::Italic => write!(w, "<em>").ok(),
843 Font::Small => write!(w, "<small>").ok(),
844 Font::SmallBold => {
845 write!(w, "<small style=\"font-weight: bold;\">").ok()
846 }
847 Font::Monospace => write!(w, "<pre>").ok(),
848 };
849 state.font = font;
850}
851
852fn pop_font(state: &mut State, w: &mut impl Write) {
853 push_font(state, Font::Regular, w);
854}
855
856fn html_heading(
857 state: &mut State,
858 heading: Heading,
859 raw: &str,
860 w: &mut impl Write,
861) {
862 let mut title = raw.trim().to_string();
864 if heading != Heading::Topic {
865 title = capitalize_heading_words(&title);
866 }
867
868 if state.in_link {
869 writeln!(w, "</a>").ok();
870 state.in_link = false;
871 }
872 if let Some(block) = state.in_block.take() {
873 writeln!(w, "</{block}>").ok();
874 }
875
876 let heading_u8: u8 = heading.into();
877 let hlevel = if state.chapter.is_some() {
878 heading_u8 + 2
879 } else {
880 heading_u8 + 1
881 };
882
883 match heading {
884 Heading::Topic => {
885 state.atopic = html_anchor(raw);
886 write!(
887 w,
888 " <h{} id=\"{}\">",
889 hlevel,
890 html_escape(&state.atopic)
891 )
892 .ok();
893 }
894 Heading::Section => {
895 state.asection = html_anchor(raw);
896 write!(
897 w,
898 " <h{} id=\"{}.{}\">",
899 hlevel,
900 html_escape(&state.atopic),
901 html_escape(&state.asection)
902 )
903 .ok();
904 }
905 Heading::Subsection => {
906 let subsection = html_anchor(raw);
907 write!(
908 w,
909 " <h{} id=\"{}.{}.{}\">",
910 hlevel,
911 html_escape(&state.atopic),
912 html_escape(&state.asection),
913 html_escape(&subsection)
914 )
915 .ok();
916 }
917 }
918
919 man_puts(state, &title, w);
920 writeln!(w, "</h{hlevel}>").ok();
921}
922
923fn capitalize_heading_words(s: &str) -> String {
924 let mut title = s.as_bytes().to_vec();
925 let mut i = 0;
927 while i < title.len() {
928 let ch = title[i];
929 if (char::from(ch)).is_ascii_alphabetic() {
930 let is_start = i == 0;
932 let is_exception = title.get(i..i + 2) == Some(b"a ")
934 || title.get(i..i + 4) == Some(b"and ")
935 || title.get(i..i + 3) == Some(b"or ")
936 || title.get(i..i + 4) == Some(b"the ");
937
938 if is_start || !is_exception {
939 title[i] = ch.to_ascii_uppercase();
941 }
942
943 let mut j = i + 1;
945 while j < title.len()
946 && (char::from(title[j])).is_ascii_alphabetic()
947 {
948 title[j] = title[j].to_ascii_lowercase();
949 j += 1;
950 }
951 i = j;
952 } else {
953 i += 1;
954 }
955 }
956 String::from_utf8(title)
957 .expect("Should be valid UTF-8")
958 .to_string()
959}
960
961fn handle_xx(
966 state: &mut State,
967 a: Font,
968 b: Font,
969 reader: &mut ManReader,
970 lp: &str,
971 break_text: &mut String,
972 w: &mut impl Write,
973 basepath: Option<&str>,
974) {
975 let mut line_rest = lp.to_owned();
976 if line_rest.trim().is_empty() {
977 if let Some(next_line_r) = reader.man_gets() {
978 line_rest = next_line_r; }
980 }
981 let mut line_rest = line_rest.as_str();
982 let mut words = Vec::new();
983 {
984 while let Some(val) = parse_value(&mut line_rest) {
985 words.push(val);
986 }
987 }
988
989 let have_basepath = basepath.is_some();
990 let basepath = basepath.unwrap_or("");
991 let original_font = state.font;
992 let mut use_a = true;
993 let mut idx = 0;
994 while idx < words.len() {
995 let word = &words[idx];
996 let mut have_link = false;
998 if a == Font::Bold
999 && b == Font::Regular
1000 && use_a
1001 && idx + 1 < words.len()
1002 && words[idx + 1].starts_with('(')
1003 && words[idx + 1].contains(')')
1004 {
1005 let sec = &words[idx + 1];
1006 if let Some(endp) = sec.find(')') {
1007 let section = &sec[1..endp];
1008 if section.chars().next().is_some_and(|c| c.is_ascii_digit()) {
1009 if have_basepath {
1010 let manfile = format!("{basepath}/{word}.{section}");
1011 if Path::new(&manfile).exists() {
1012 write!(
1013 w,
1014 "<a href=\"{}.html\">",
1015 html_escape(word)
1016 )
1017 .ok();
1018 have_link = true;
1019 }
1020 }
1021 }
1022 }
1023 }
1024
1025 push_font(state, if use_a { a } else { b }, w);
1026 man_puts(state, word, w);
1027
1028 if have_link {
1029 if idx + 1 < words.len() {
1030 idx += 1;
1031 push_font(state, b, w);
1032 man_puts(state, &words[idx], w);
1033 }
1034 write!(w, "</a>").ok();
1035 } else {
1036 use_a = !use_a;
1037 }
1038 idx += 1;
1039 }
1040
1041 push_font(state, original_font, w);
1042 writeln!(w).ok();
1043 if !break_text.is_empty() {
1044 writeln!(w, "{break_text}").ok();
1045 break_text.clear();
1046 }
1047 writeln!(w).ok();
1048}
1049
1050fn flush_fragment(start: usize, end: usize, w: &mut impl Write, bytes: &[u8]) {
1055 if end > start {
1056 let slice = &bytes[start..end];
1057 let text = String::from_utf8_lossy(slice);
1058 write!(w, "{}", html_escape(&text)).ok();
1059 }
1060}
1061
1062fn man_puts(state: &mut State, s: &str, w: &mut impl Write) {
1063 let mut i = 0usize;
1065 let bytes = s.as_bytes();
1066 let mut fragment_start = 0usize;
1067
1068 while i < bytes.len() {
1069 if bytes[i] == b'\\' && i + 1 < bytes.len() {
1070 flush_fragment(fragment_start, i, w, bytes);
1071 i += 1;
1072 let c = char::from(bytes[i]);
1073 match c {
1074 'f' => {
1075 if i + 1 < bytes.len() {
1076 i += 1;
1077 let fch = char::from(bytes[i]);
1078 match fch {
1079 'R' | 'P' => push_font(state, Font::Regular, w),
1080 'b' | 'B' => push_font(state, Font::Bold, w),
1081 'i' | 'I' => push_font(state, Font::Italic, w),
1082 _ => {
1083 eprintln!(
1084 "mantohtml: Unknown font '\\f{fch}' ignored."
1085 );
1086 }
1087 }
1088 i += 1;
1089 } else {
1090 i += 1;
1091 }
1092 }
1093 '*' => {
1094 i += 1;
1095 if i < bytes.len() {
1096 let ch = char::from(bytes[i]);
1097 if ch == '(' {
1098 i += 1;
1099 if i + 1 < bytes.len() {
1100 let m1 = char::from(bytes[i]);
1101 let m2 = char::from(bytes[i + 1]);
1102 let macro_id = format!("{m1}{m2}");
1103 match macro_id.as_str() {
1104 "aq" => write!(w, "'").ok(),
1105 "dq" => write!(w, """).ok(),
1106 "lq" => write!(w, "“").ok(),
1107 "rq" => write!(w, "”").ok(),
1108 "Tm" => write!(w, "<sup>TM</sup>").ok(),
1109 #[allow(clippy::unit_arg)]
1110 _ => Some(eprintln!(
1111 "mantohtml: Unknown macro '\\*({m1}{m2})' ignored."
1112 )),
1113 };
1114 i += 2;
1115 }
1116 } else {
1117 match ch {
1118 'R' => write!(w, "®").ok(),
1119 #[allow(clippy::unit_arg)]
1120 _ => Some(eprintln!(
1121 "mantohtml: Unknown macro '\\*{ch}' ignored."
1122 )),
1123 };
1124 i += 1;
1125 }
1126 }
1127 }
1128 '(' => {
1129 if i + 2 < bytes.len() {
1131 let seq = &bytes[i..i + 3];
1132 let token = String::from_utf8_lossy(seq);
1133 match token.as_ref() {
1134 "(bu" => write!(w, "·").ok(),
1135 "(em" => write!(w, "—").ok(),
1136 "(en" => write!(w, "–").ok(),
1137 "(ga" => write!(w, "`").ok(),
1138 "(ha" => write!(w, "^").ok(),
1139 "(ti" => write!(w, "~").ok(),
1140 _ => write!(w, "{}", html_escape(&token)).ok(),
1141 };
1142 i += 3;
1143 } else {
1144 write!(w, "(").ok();
1146 i += 1;
1147 }
1148 }
1149 '[' => {
1150 let mut j = i;
1152 while j < bytes.len() && bytes[j] != b']' {
1153 j += 1;
1154 }
1155 if j < bytes.len() && bytes[j] == b']' {
1156 let token = &s[i + 1..j]; match token {
1159 "aq" => write!(w, "'").ok(),
1160 "co" => write!(w, "©").ok(),
1161 "cq" => write!(w, "’").ok(),
1162 "de" => write!(w, "°").ok(),
1163 "dq" => write!(w, """).ok(),
1164 "lq" => write!(w, "“").ok(),
1165 "mc" => write!(w, "μ").ok(),
1166 "oq" => write!(w, "‘").ok(),
1167 "rg" => write!(w, "®").ok(),
1168 "rq" => write!(w, "”").ok(),
1169 "tm" => write!(w, "<sup>TM</sup>").ok(),
1170 _ => write!(w, "\\[{}]", html_escape(token)).ok(),
1172 };
1173 i = j + 1;
1174 } else {
1175 write!(w, "\\[").ok();
1177 i += 1;
1178 }
1179 }
1180 'e' => {
1181 write!(w, "\\").ok();
1183 i += 1;
1184 }
1185 d if d.is_ascii_digit() => {
1187 let start = i;
1188 let mut num = String::new();
1189 num.push(d);
1190 i += 1;
1191 if i < bytes.len() && char::from(bytes[i]).is_ascii_digit()
1192 {
1193 num.push(char::from(bytes[i]));
1194 i += 1;
1195 }
1196 if i < bytes.len() && char::from(bytes[i]).is_ascii_digit()
1197 {
1198 num.push(char::from(bytes[i]));
1199 i += 1;
1200 }
1201 if let Ok(v) = i32::from_str_radix(&num, 8) {
1202 write!(w, "&#{v};").ok();
1203 }
1204 }
1205 other => {
1206 match other {
1208 '\\' | '"' | '\'' | '-' | ' ' => {
1209 write!(w, "{other}").ok();
1210 }
1211 _ => {
1212 eprintln!(
1213 "mantohtml: Unrecognized escape '\\{other}' ignored."
1214 );
1215 write!(w, "\\{other}").ok();
1216 }
1217 }
1218 i += 1;
1219 }
1220 }
1221 fragment_start = i;
1222 } else if starts_with_url(&bytes[i..]) {
1223 flush_fragment(fragment_start, i, w, bytes);
1224 let (url, consumed) = extract_url(&bytes[i..]);
1225 write!(
1226 w,
1227 "<a href=\"{}\">{}</a>",
1228 html_escape(&url),
1229 html_escape(&url)
1230 )
1231 .ok();
1232 i += consumed;
1233 fragment_start = i;
1234 } else if matches!(bytes[i], b'<' | b'&' | b'"') {
1235 flush_fragment(fragment_start, i, w, bytes);
1236 let ch = char::from(bytes[i]);
1237 write!(w, "{}", html_escape(&ch.to_string())).ok();
1238 i += 1;
1239 fragment_start = i;
1240 } else {
1241 i += 1;
1242 }
1243 }
1244 flush_fragment(fragment_start, i, w, bytes);
1245}
1246
1247fn starts_with_url(slice: &[u8]) -> bool {
1248 let s = std::str::from_utf8(slice).unwrap_or("");
1249 s.starts_with("http://") || s.starts_with("https://")
1250}
1251
1252fn extract_url(slice: &[u8]) -> (String, usize) {
1253 let s = std::str::from_utf8(slice).unwrap_or("");
1254 let mut end = 0usize;
1255 let chars: Vec<char> = s.chars().collect();
1256 while end < chars.len() {
1257 let c = chars[end];
1258 if c.is_whitespace() {
1259 break;
1260 }
1261 if ",.)".contains(c)
1262 && (end + 1 == chars.len()
1263 || chars[end + 1].is_whitespace()
1264 || chars[end + 1] == '\0')
1265 {
1266 break;
1267 }
1268 end += 1;
1269 }
1270 let url: String = chars[..end].iter().collect();
1271 let len = url.len();
1272 (url, len)
1273}
1274
1275fn parse_value(line: &mut &str) -> Option<String> {
1280 let mut l = line.trim_start();
1281 if l.is_empty() {
1282 return None;
1283 }
1284 let mut out = String::new();
1285 if l.starts_with('"') {
1286 l = &l[1..];
1287 while !l.is_empty() {
1288 let c = l.chars().next().unwrap();
1289 if c == '"' {
1290 l = &l[1..];
1291 break;
1292 } else if c == '\\' {
1293 l = &l[1..];
1294 if let Some(nextc) = l.chars().next() {
1295 out.push(nextc);
1296 l = &l[nextc.len_utf8()..];
1297 }
1298 } else {
1299 out.push(c);
1300 l = &l[c.len_utf8()..];
1301 }
1302 }
1303 } else {
1304 while !l.is_empty() {
1305 let c = l.chars().next().unwrap();
1306 if c.is_whitespace() {
1307 break;
1308 }
1309 out.push(c);
1310 l = &l[c.len_utf8()..];
1311 if c == '\\' && !l.is_empty() {
1312 let c2 = l.chars().next().unwrap();
1314 out.push(c2);
1315 l = &l[c2.len_utf8()..];
1316 }
1317 }
1318 }
1319 *line = l.trim_start();
1320 Some(out)
1321}
1322
1323fn parse_measurement(line: &mut &str, defunit: char) -> Option<String> {
1324 let val = parse_value(line)?;
1325 if val.is_empty() {
1326 return None;
1327 }
1328 let s = val.clone();
1329 let unit = s
1330 .chars()
1331 .next_back()
1332 .filter(char::is_ascii_alphabetic)
1333 .unwrap_or(defunit);
1334
1335 let number_part =
1336 if s.chars().last().is_some_and(|c| c.is_ascii_alphabetic()) {
1337 s[..s.len() - 1].to_string()
1338 } else {
1339 s.clone()
1340 };
1341
1342 let parsed = number_part.parse::<f64>().unwrap_or(0.0);
1343
1344 let converted = match unit {
1345 'c' => format!("{number_part}cm"),
1346 'f' => format!("{:.1}%", 100.0 * parsed / 65536.0),
1347 'i' => format!("{number_part}in"),
1348 'm' => format!("{number_part}em"),
1349 'M' => format!("{:.2}em", parsed * 0.01),
1350 'n' => format!("{}em", parsed * 0.5),
1351 'P' => format!("{number_part}pc"),
1352 'p' => format!("{number_part}pt"),
1353 's' => format!("{:.1}%", 100.0 * parsed),
1354 'u' => format!("{number_part}px"),
1355 'v' => number_part,
1356 _ => return None,
1357 };
1358
1359 Some(converted)
1360}
1361
1362fn close_block_if(state: &mut State, w: &mut impl Write) {
1367 if let Some(block) = state.in_block.take() {
1368 writeln!(w, "</{block}>").ok();
1369 }
1370}
1371
1372fn close_link_if(state: &mut State, w: &mut impl Write) {
1373 if state.in_link {
1374 writeln!(w, "</a>").ok();
1375 state.in_link = false;
1376 }
1377}
1378
1379fn usage(bad: Option<&str>) {
1380 if let Some(opt) = bad {
1381 eprintln!("Unknown option: {opt}");
1382 }
1383 println!("Usage: mantohtml [OPTIONS] MAN-FILE [... MAN-FILE] >HTML-FILE");
1384 println!("Options:");
1385 println!(" --author 'AUTHOR' Set author metadata");
1386 println!(" --chapter 'CHAPTER' Set chapter (H1 heading)");
1387 println!(" --copyright 'COPYRIGHT' Set copyright metadata");
1388 println!(" --css CSS-FILE-OR-URL Use named stylesheet");
1389 println!(" --help Show help");
1390 println!(" --subject 'SUBJECT' Set subject metadata");
1391 println!(" --title 'TITLE' Set output title");
1392 println!(" --version Show version");
1393}
1394
1395#[cfg(test)]
1396mod tests {
1397 use crate::formats::assert_vec_u8_ok_eq_no_warnings;
1398 use crate::storage::get_asset;
1399
1400 use super::*;
1401
1402 #[crate::ctb_test]
1403 fn test_html_escape() {
1404 assert_eq!(html_escape("Hello & World"), "Hello & World");
1405 assert_eq!(html_escape("<tag>"), "<tag>");
1407 assert_eq!(html_escape("\"quote\""), ""quote"");
1408 assert_eq!(html_escape("No special chars"), "No special chars");
1409 }
1410
1411 #[crate::ctb_test]
1412 fn test_html_anchor() {
1413 assert_eq!(html_anchor("Section 1.2"), "section-1.2");
1414 assert_eq!(html_anchor("My-Function()"), "my-function-");
1415 assert_eq!(html_anchor("A B C"), "a-b-c");
1416 assert_eq!(html_anchor("!@#$%^&*()"), "-");
1417 }
1418
1419 #[crate::ctb_test]
1420 fn test_capitalize_heading_words() {
1421 assert_eq!(
1422 capitalize_heading_words("the quick brown fox"),
1423 "The Quick Brown Fox"
1424 );
1425 assert_eq!(
1426 capitalize_heading_words("a tale of two cities"),
1427 "A Tale Of Two Cities"
1428 );
1429 assert_eq!(
1430 capitalize_heading_words("a tale or two cities"),
1431 "A Tale or Two Cities"
1432 );
1433 assert_eq!(
1434 capitalize_heading_words("and then there were none"),
1435 "And Then There Were None"
1436 );
1437 }
1438
1439 #[crate::ctb_test]
1440 fn test_starts_with_url() {
1441 assert!(starts_with_url(b"http://example.com"));
1442 assert!(starts_with_url(b"https://example.com"));
1443 assert!(!starts_with_url(b"ftp://example.com"));
1444 assert!(!starts_with_url(b"Just some text"));
1445 }
1446
1447 #[crate::ctb_test]
1448 fn test_extract_url() {
1449 let (url, len) =
1450 extract_url(b"http://example.com/path?query=1 more text");
1451 assert_eq!(url, "http://example.com/path?query=1");
1452 assert_eq!(len, url.len());
1453
1454 let (url2, len2) = extract_url(b"https://example.com.");
1455 assert_eq!(url2, "https://example.com");
1456 assert_eq!(len2, url2.len());
1457
1458 let (url3, len3) = extract_url(b"https://example.com)");
1459 assert_eq!(url3, "https://example.com");
1460 assert_eq!(len3, url3.len());
1461 }
1462
1463 #[crate::ctb_test]
1464 fn test_convert_from_fixture() {
1465 assert_vec_u8_ok_eq_no_warnings(
1466 &get_asset("fixtures/formats/troff/out.html")
1467 .expect("Could not get fixture"),
1468 convert_man_troff_to_html(
1469 get_asset("fixtures/formats/troff/in.1")
1470 .expect("Could not get fixture"),
1471 ),
1472 );
1473 }
1474}
1475
1476