source: trunk/PrefsPane/Resources/man2html @ 1046

Revision 719, 18.6 KB checked in by speck, 3 years ago (diff)

Prefs panel editing of site-wide proxy javascript. ProxyScriptEditor?. Major refactoring (e.g. NamedEnum?, CodeEditHelper?) to support javascript editing in multiple windows.

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl
2##---------------------------------------------------------------------------##
3##  File:
4##      @(#) man2html 1.2 97/08/12 12:57:30 @(#)
5##  Author:
6##      Earl Hood, ehood@medusa.acs.uci.edu
7##  Description:
8##      man2html is a Perl program to convert formatted nroff output
9##  to HTML.
10## 
11##  Recommend command-line options based on platform:
12##
13##  Platform        Options
14##  ---------------------------------------------------------------------
15##  c2mp            <None, the defaults should be okay>
16##  hp9000s700/800      -leftm 1 -topm 8
17##  sun4            -sun
18##  ---------------------------------------------------------------------
19##
20##---------------------------------------------------------------------------##
21##  Copyright (C) 1995-1997 Earl Hood, ehood@medusa.acs.uci.edu
22##
23##  This program is free software; you can redistribute it and/or modify
24##  it under the terms of the GNU General Public License as published by
25##  the Free Software Foundation; either version 2 of the License, or
26##  (at your option) any later version.
27## 
28##  This program is distributed in the hope that it will be useful,
29##  but WITHOUT ANY WARRANTY; without even the implied warranty of
30##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31##  GNU General Public License for more details.
32## 
33##  You should have received a copy of the GNU General Public License
34##  along with this program; if not, write to the Free Software
35##  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36##  02111-1307, USA
37##---------------------------------------------------------------------------##
38
39package Man2Html;
40
41use Getopt::Long;
42
43($PROG = $0) =~ s/.*\///;
44$VERSION = "3.0.1";
45
46## Input and outputs filehandles
47$InFH   = \*STDIN   unless $InFH;
48$OutFH  = \*STDOUT  unless $OutFH;
49
50## Backspace character:  Used in overstriking detection
51*bs = \"\b";
52
53##  Hash of section titles and their HTML tag wrapper.
54##  This list allows customization of what HTML tag is used for
55##  a given section head.
56##
57##  The section title can be a regular expression.  Therefore, one must
58##  be careful about quoting special characters.
59##
60%SectionHead = (
61
62    '\S.*OPTIONS.*'     => '<H2>',
63    'AUTHORS?'          => '<H2>',
64    'BUGS'              => '<H2>',
65    'COMPATIBILITY'     => '<H2>',
66    'DEPENDENCIES'      => '<H2>',
67    'DESCRIPTION'       => '<H2>',
68    'DIAGNOSTICS'       => '<H2>',
69    'ENVIRONMENT'       => '<H2>',
70    'ERRORS'            => '<H2>',
71    'EXAMPLES'          => '<H2>',
72    'EXTERNAL INFLUENCES'   => '<H2>',
73    'FILES'             => '<H2>',
74    'LIMITATIONS'       => '<H2>',
75    'NAME'              => '<H2>',
76    'NOTES?'            => '<H2>',
77    'OPTIONS'           => '<H2>',
78    'REFERENCES'        => '<H2>',
79    'RETURN VALUE'      => '<H2>',
80    'SECTION.*:'        => '<H2>',
81    'SEE ALSO'          => '<H2>',
82    'STANDARDS CONFORMANCE' => '<H2>',
83    'STYLE CONVENTION'      => '<H2>',
84    'SYNOPSIS'          => '<H2>',
85    'SYNTAX'            => '<H2>',
86    'WARNINGS'          => '<H2>',
87    '\s+Section.*:'     => '<H3>',
88
89);
90
91## Fallback tag if above is not found
92$HeadFallback = '<H2>';
93
94## Other gobals
95
96$Bare      = 0;     # Skip printing HTML head/foot flag
97$BTag      = 'B';   # Overstrike tag
98$CgiUrl    = '';    # CGI URL expression
99$Compress  = 0;     # Do blank line compression flag
100$K         = 0;     # Do keyword search processing flag
101$NoDepage  = 0;     # Do not strip page information
102$NoHeads   = 0;     # Do no header detection flag
103$SeeAlso   = 0;     # Do only SEE ALSO xrefs flag
104$Solaris   = 0;     # Solaris keyword search processing flag
105$Sun       = 0;     # Headers not overstriken flag
106$Title     = '';    # Title
107$UTag      = 'I';   # Underline tag
108$ftsz      = 7;     # Bottome margin size
109$hdsz      = 7;     # Top margin size
110$leftm     = '';    # Left margin pad
111$leftmsz   = 0;     # Left margin size
112$pgsz      = 66;    # Size of page size
113$txsz      = 52;    # Text body length size
114
115#############################################################################
116##  Main Block
117#############################################################################
118{
119    if (get_cli_opts()) {
120    if ($K) {
121        man_k();
122    } else {
123        do_it();
124    }
125    } else {
126    usage();
127    }
128}
129
130#############################################################################
131##  Subroutines
132#############################################################################
133
134sub do_it {
135
136    ##  Define while loop and then eval it when used.  The reason
137    ##  is to avoid the regular expression reevaulation in the
138    ##  section head detection code.
139
140    $doitcode =<<'EndOfDoItCode';
141
142    my($line, $tmp, $i, $head, $preindent, $see_also, $do);
143
144    $see_also = !$SeeAlso;
145    print $OutFH "<!-- Manpage converted by man2html $VERSION -->\n";
146    LOOP: while(!eof($InFH)) {
147    $blank = 0;
148    for ($i=0; $i < $hdsz; $i++) {
149        last LOOP  unless defined($_ = <$InFH>);
150    }
151    for ($i=0; $i < $txsz; $i++) {
152        last LOOP  unless defined($_ = <$InFH>);
153
154        ## Check if compress consecutive blank lines
155        if ($Compress and !/\S/) {
156        if ($blank) { next; } else { $blank = 1; }
157        } else {
158        $blank = 0;
159        }
160
161        ## Try to check if line space is needed at page boundaries ##
162        if (!$NoDepage && ($i==0 || $i==($txsz-1)) && !/^\s*$/) {
163        /^(\s*)/;  $tmp = length($1);
164        if ($do) {
165            if ($tmp < $preindent) { print $OutFH "\n"; }
166        } else {
167            $do = 1;
168        }
169        $preindent = $tmp;
170        } else {
171        $do = 0;  $preindent = 0;
172        }
173
174        ## Interpret line
175        $line = $_;
176        entitize(\$_);      # Convert [$<>] to entity references
177
178        ## Check for 'SEE ALSO' link only
179        if (!$see_also && $CgiUrl && $SeeAlso) {
180        ($tmp = $line) =~ s/.\010//go;
181        if ($tmp =~ /^\s*SEE\s+ALSO\s*$/o) { $see_also = 1; }
182        else { $see_also = 0; }
183        }
184
185        ## Create anchor links for manpage references
186        s/((((.\010)+)?[\+_\.\w-])+\(((.\010)+)?
187          \d((.\010)+)?\w?\))
188         /make_xref($1)
189         /geox  if $see_also;
190
191        ## Emphasize underlined words
192        # s/((_\010[^_])+[\.\(\)_]?(_\010[^_])+\)?)/emphasize($1)/oge;
193        # s/((_\010[^_])+([\.\(\)_]?(_\010[^_])+)?)/emphasize($1)/oge;
194        #
195        # The previous expressions were trying to be clever about
196        # detecting underlined text which contain non-alphanumeric
197        # characters.  nroff will not underline non-alphanumeric
198        # characters in an underlined phrase, and the above was trying
199        # to detect that.  It does not work all the time, and it
200        # screws up other text, so a simplified expression is used.
201
202        s/((_\010[^_])+)/emphasize($1)/oge;
203
204        $secth = 0;
205        ## Check for strong text and headings
206        if ($Sun || /.\010./o) {
207        if (!$NoHeads) {
208            $line =~ s/.\010//go;
209            $tmp = $HeadFallback;
210EndOfDoItCode
211
212    ##  Create switch statement for detecting a heading
213    ##
214    $doitcode .= "HEADSW: {\n";
215    foreach $head (keys %SectionHead) {
216    $doitcode .= join("", "\$tmp = '$SectionHead{$head}', ",
217                  "\$secth = 1, last HEADSW  ",
218                  "if \$line =~ /^$leftm$head/o;\n");
219    }
220    $doitcode .= "}\n";
221
222    ##  Rest of routine
223    ##
224    $doitcode .=<<'EndOfDoItCode';
225            if ($secth || $line =~ /^$leftm\S/o) {
226            chop $line;
227            $_ = $tmp . $line . $tmp;
228            s%<([^>]*)>$%</$1>%;
229            $_ = "\n</PRE>\n" . $_ . "<PRE>\n";
230            } else {
231            s/(((.\010)+.)+)/strongize($1)/oge;
232            }
233        } else {
234            s/(((.\010)+.)+)/strongize($1)/oge;
235        }
236        }
237        print $OutFH $_;
238    }
239
240    for ($i=0; $i < $ftsz; $i++) {
241        last LOOP  unless defined($_ = <$InFH>);
242    }
243    }
244EndOfDoItCode
245
246
247    ##  Perform processing.
248
249    printhead()  unless $Bare;
250    print $OutFH "<PRE>\n";
251    eval $doitcode;         # $doitcode defined above
252    print $OutFH "</PRE>\n";
253    printtail()  unless $Bare;
254}
255
256##---------------------------------------------------------------------------
257##
258sub get_cli_opts {
259    return 0  unless
260    GetOptions(
261    "bare",     # Leave out HTML, HEAD, BODY tags.
262    "belem=s",  # HTML Element for overstriked text (def: "B")
263    "botm=i",   # Number of lines for bottom margin (def: 7)
264    "cgiurl=s", # CGI URL for linking to other manpages
265    "cgiurlexp=s",  # CGI URL Perl expr for linking to other manpages
266    "compress", # Compress consecutive blank lines
267    "headmap=s",    # Filename of user section head map file
268    "k",        # Process input from 'man -k' output.
269    "leftm=i",  # Character width of left margin (def: 0)
270    "nodepage", # Do not remove pagination lines
271    "noheads",  # Do not detect for section heads
272    "pgsize=i", # Number of lines in a page (def: 66)
273    "seealso",  # Link to other manpages only in the SEE ALSO section
274    "solaris",  # Parse 'man -k' output from a solaris system
275    "sun",      # Section heads are not overstriked in input
276    "title=s",  # Title of manpage (def: Not defined)
277    "topm=i",   # Number of lines for top margin (def: 7)
278    "uelem=s",  # HTML Element for underlined text (def: "I")
279
280    "help"      # Short usage message
281    );
282    return 0  if defined($opt_help);
283
284    $pgsz = $opt_pgsize || $pgsz;
285    if (defined($opt_nodepage)) {
286    $hdsz   = 0;
287    $ftsz   = 0;
288    } else {
289    $hdsz   = $opt_topm  if defined($opt_topm);
290    $ftsz   = $opt_botm  if defined($opt_botm);
291    }
292    $txsz       = $pgsz - ($hdsz + $ftsz);
293    $leftmsz    = $opt_leftm  if defined($opt_leftm);
294    $leftm      = ' ' x $leftmsz;
295
296    $Bare       = defined($opt_bare);
297    $Compress   = defined($opt_compress);
298    $K          = defined($opt_k);
299    $NoDepage   = defined($opt_nodepage);
300    $NoHeads    = defined($opt_noheads);
301    $SeeAlso    = defined($opt_seealso);
302    $Solaris    = defined($opt_solaris);
303    $Sun        = defined($opt_sun);
304
305    $Title      = $opt_title || $Title;
306    $CgiUrl     = $opt_cgiurlexp ||
307            ($opt_cgiurl ? qq{return "$opt_cgiurl"} : '');
308
309    $BTag   = $opt_belem || $BTag;
310    $UTag   = $opt_uelem || $UTag;
311    $BTag   =~ s/[<>]//g;
312    $UTag   =~ s/[<>]//g;
313
314    if (defined($opt_headmap)) {
315    require $opt_headmap or warn "Unable to read $opt_headmap\n";
316    }
317    1;
318}
319
320##---------------------------------------------------------------------------
321sub printhead {
322    print $OutFH "<HTML>\n";
323    print $OutFH "<HEAD>\n",
324         "<TITLE>$Title</TITLE>\n",
325         "</HEAD>\n"  if $Title;
326    print $OutFH "<BODY>\n";
327    print $OutFH "<H1>$Title</H1>\n",
328         "<HR>\n"  if $Title;
329}
330
331##---------------------------------------------------------------------------
332sub printtail {
333    print $OutFH <<EndOfRef;
334<HR>
335<ADDRESS>
336Man(1) output converted with
337<a href="http://www.oac.uci.edu/indiv/ehood/man2html.html">man2html</a>
338</ADDRESS>
339</BODY>
340</HTML>
341EndOfRef
342}
343
344##---------------------------------------------------------------------------
345sub emphasize {
346    my($txt) = shift;
347    $txt =~ s/.\010//go;
348    $txt = "<$UTag>$txt</$UTag>";
349    $txt;
350}
351
352##---------------------------------------------------------------------------
353sub strongize {
354    my($txt) = shift;
355    $txt =~ s/.\010//go;
356    $txt = "<$BTag>$txt</$BTag>";
357    $txt;
358}
359
360##---------------------------------------------------------------------------
361sub entitize {
362    my($txt) = shift;
363
364    ## Check for special characters in overstrike text ##
365    $$txt =~ s/_\010\&/strike('_', '&')/geo;
366    $$txt =~ s/_\010</strike('_', '<')/geo;
367    $$txt =~ s/_\010>/strike('_', '>')/geo;
368
369    $$txt =~ s/(\&\010)+\&/strike('&', '&')/geo;
370    $$txt =~ s/(<\010)+</strike('<', '<')/geo;
371    $$txt =~ s/(>\010)+>/strike('>', '>')/geo;
372
373    ## Check for special characters in regular text.  Must be careful
374    ## to check before/after character in expression because it might be
375    ## a special character.
376    $$txt =~ s/([^\010]\&[^\010])/htmlize2($1)/geo;
377    $$txt =~ s/([^\010]<[^\010])/htmlize2($1)/geo;
378    $$txt =~ s/([^\010]>[^\010])/htmlize2($1)/geo;
379}
380
381##---------------------------------------------------------------------------
382##  escape special characters in a string, in-place
383##
384sub htmlize {
385    my($str) = shift;
386    $$str =~ s/&/\&amp;/g;
387    $$str =~ s/</\&lt;/g;
388    $$str =~ s/>/\&gt;/g;
389    $$str;
390}
391
392##---------------------------------------------------------------------------
393##  htmlize2() is used by entitize.
394##
395sub htmlize2 {
396    my($str) = shift;
397    $str =~ s/&/\&amp;/g;
398    $str =~ s/</\&lt;/g;
399    $str =~ s/>/\&gt;/g;
400    $str;
401}
402
403##---------------------------------------------------------------------------
404##  strike converts HTML special characters in overstriked text
405##  into entity references.  The entities are overstriked so
406##  strongize() and emphasize() will recognize the entity to be
407##  wrapped in tags.
408##
409sub strike {
410    my($w, $char) = @_;
411    my($ret);
412    if ($w eq '_') {
413    if ($char eq '&') {
414        $ret = "_$bs\&_${bs}a_${bs}m_${bs}p_${bs};";
415    } elsif ($char eq '<') {
416        $ret = "_$bs\&_${bs}l_${bs}t_${bs};";
417    } elsif ($char eq '>') {
418        $ret = "_$bs\&_${bs}g_${bs}t_${bs};";
419    } else {
420        warn qq|Unrecognized character, "$char", passed to strike()\n|;
421    }
422    } else {
423    if ($char eq '&') {
424        $ret = "\&$bs\&a${bs}am${bs}mp${bs}p;${bs};";
425    } elsif ($char eq '<') {
426        $ret = "\&$bs\&l${bs}lt${bs}t;${bs};";
427    } elsif ($char eq '>') {
428        $ret = "\&$bs\&g${bs}gt${bs}t;${bs};";
429    } else {
430        warn qq|Unrecognized character, "$char", passed to strike()\n|;
431    }
432    }
433    $ret;
434}
435
436##---------------------------------------------------------------------------
437##  make_xref() converts a manpage crossreference into a hyperlink.
438##
439sub make_xref {
440    my $str = shift;
441    $str =~ s/.\010//go;            # Remove overstriking
442
443    if ($CgiUrl) {
444    my($title,$section,$subsection) =
445        ($str =~ /([\+_\.\w-]+)\((\d)(\w?)\)/);
446
447    $title =~ s/\+/%2B/g;
448    my($href) = (eval $CgiUrl);
449    qq|<B><A HREF="$href">$str</A></B>|;
450    } else {
451    qq|<B>$str</B>|;
452    }
453}
454
455##---------------------------------------------------------------------------
456##  man_k() process a keyword search.  The problem we have is there
457##  is no standard for keyword search results from man.  Solaris
458##  systems have a different enough format to warrent dealing
459##  with it as a special case.  For other cases, we try our best.
460##  Unfortunately, there are some lines of results that may be
461##  skipped.
462##
463sub man_k {
464    my($line,$refs,$section,$subsection,$desc,$i,
465       %Sec1, %Sec1sub, %Sec2, %Sec2sub, %Sec3, %Sec3sub,
466       %Sec4, %Sec4sub, %Sec5, %Sec5sub, %Sec6, %Sec6sub,
467       %Sec7, %Sec7sub, %Sec8, %Sec8sub, %Sec9, %Sec9sub,
468       %SecN, %SecNsub, %SecNsec);
469
470    printhead()  unless $Bare;
471    print $OutFH "<!-- Man keyword results converted by ",
472              "man2html $VERSION -->\n";
473
474    while ($line = <$InFH>) {
475    next if $line !~ /\(\d\w?\)\s+-\s/; # check if line can be handled
476    ($refs,$section,$subsection,$desc) =
477        $line =~ /^\s*(.*)\((\d)(\w?)\)\s*-\s*(.*)$/;
478
479    if ($Solaris) {
480        $refs =~ s/^\s*([\+_\.\w-]+)\s+([\+_\.\w-]+)\s*$/$1/;
481                    #  <topic> <manpage>
482    } else {
483        $refs =~ s/\s(and|or)\s/,/gi; # Convert and/or to commas
484        $refs =~ s/^[^:\s]:\s*//;   # Remove prefixed whatis path
485    }
486    $refs =~ s/\s//g;       # Remove all whitespace
487    $refs =~ s/,/, /g;      # Put space after comma
488    htmlize(\$desc);        # Check for special chars in desc
489    $desc =~ s/^(.)/\U$1/;      # Uppercase first letter in desc
490
491    if ($section eq '1') {
492        $Sec1{$refs} = $desc; $Sec1sub{$refs} = $subsection;
493    } elsif ($section eq '2') {
494        $Sec2{$refs} = $desc; $Sec2sub{$refs} = $subsection;
495    } elsif ($section eq '3') {
496        $Sec3{$refs} = $desc; $Sec3sub{$refs} = $subsection;
497    } elsif ($section eq '4') {
498        $Sec4{$refs} = $desc; $Sec4sub{$refs} = $subsection;
499    } elsif ($section eq '5') {
500        $Sec5{$refs} = $desc; $Sec5sub{$refs} = $subsection;
501    } elsif ($section eq '6') {
502        $Sec6{$refs} = $desc; $Sec6sub{$refs} = $subsection;
503    } elsif ($section eq '7') {
504        $Sec7{$refs} = $desc; $Sec7sub{$refs} = $subsection;
505    } elsif ($section eq '8') {
506        $Sec8{$refs} = $desc; $Sec8sub{$refs} = $subsection;
507    } elsif ($section eq '9') {
508        $Sec9{$refs} = $desc; $Sec9sub{$refs} = $subsection;
509    } else {            # Catch all
510        $SecN{$refs} = $desc; $SecNsec{$refs} = $section;
511        $SecNsub{$refs} = $subsection;
512    }
513    }
514    print_mank_sec(\%Sec1, 1, \%Sec1sub);
515    print_mank_sec(\%Sec2, 2, \%Sec2sub);
516    print_mank_sec(\%Sec3, 3, \%Sec3sub);
517    print_mank_sec(\%Sec4, 4, \%Sec4sub);
518    print_mank_sec(\%Sec5, 5, \%Sec5sub);
519    print_mank_sec(\%Sec6, 6, \%Sec6sub);
520    print_mank_sec(\%Sec7, 7, \%Sec7sub);
521    print_mank_sec(\%Sec8, 8, \%Sec8sub);
522    print_mank_sec(\%Sec9, 9, \%Sec9sub);
523    print_mank_sec(\%SecN, 'N', \%SecNsub, \%SecNsec);
524
525    printtail()  unless $Bare;
526}
527##---------------------------------------------------------------------------
528##  print_mank_sec() prints out manpage cross-refs of a specific section.
529##
530sub print_mank_sec {
531    my($sec, $sect, $secsub, $secsec) = @_;
532    my(@array, @refs, $href, $item, $title, $subsection, $i, $section,
533       $xref);
534    $section = $sect;
535
536    @array = sort keys %$sec;
537    if ($#array >= 0) {
538    print $OutFH "<H2>Section $section</H2>\n",
539             "<DL COMPACT>\n";
540    foreach $item (@array) {
541        @refs = split(/,/, $item);
542        $section = $secsec->{$item}  if $sect eq 'N';
543        $subsection = $secsub->{$item};
544        if ($CgiUrl) {
545        ($title = $refs[0]) =~ s/\(\)//g;  # watch out for extra ()'s
546        $xref = eval $CgiUrl;
547        }
548        print $OutFH "<DT>\n";
549        $i = 0;
550        foreach (@refs) {
551        if ($CgiUrl) {
552            print $OutFH qq|<B><A HREF="$xref">$_</A></B>|;
553        } else {
554            print $OutFH $_;
555        }
556        print $OutFH ", "  if $i < $#refs;
557        $i++;
558        }
559        print $OutFH " ($section$subsection)\n",
560             "</DT><DD>\n",
561             $sec->{$item}, "</DD>\n";
562    }
563    print $OutFH "</DL>\n";
564    }
565}
566
567##---------------------------------------------------------------------------
568##
569sub usage {
570    print $OutFH <<EndOfUsage;
571Usage: $PROG [ options ] < infile > outfile
572Options:
573  -bare            : Do not put in HTML, HEAD, BODY tags
574  -belem <elem>    : HTML Element for overstriked text (def: "B")
575  -botm <#>        : Number of lines for bottom margin (def: 7)
576  -cgiurl <url>    : URL for linking to other manpages
577  -cgiurlexp <url> : Perl expression URL for linking to other manpages
578  -compress        : Compress consective blank lines
579  -headmap <file>  : Filename of user section head map file
580  -help            : This message
581  -k               : Process a keyword search result
582  -leftm <#>       : Character width of left margin (def: 0)
583  -nodepage        : Do not remove pagination lines
584  -noheads         : Turn off section head detection
585  -pgsize <#>      : Number of lines in a page (def: 66)
586  -seealso         : Link to other manpages only in the SEE ALSO section
587  -solaris         : Process keyword search result in Solaris format
588  -sun             : Section heads are not overstriked in input
589  -title <string>  : Title of manpage (def: Not defined)
590  -topm <#>        : Number of lines for top margin (def: 7)
591  -uelem <elem>    : HTML Element for underlined text (def: "I")
592
593Description:
594  $PROG takes formatted manpages from STDIN and converts it to HTML sent
595  to STDOUT.  The -topm and -botm arguments are the number of lines to the
596  main body text and NOT to the running headers/footers.
597
598Version:
599  $VERSION
600  Copyright (C) 1995-1997  Earl Hood, ehood\@medusa.acs.uci.edu
601  $PROG comes with ABSOLUTELY NO WARRANTY and $PROG may be copied only
602  under the terms of the GNU General Public License, which may be found in
603  the $PROG distribution.
604
605EndOfUsage
606    exit 0;
607}
Note: See TracBrowser for help on using the repository browser.