fix section heading heuristic
[c-standard] / tohtml.sh
1 #!/bin/sh
2
3 export LC_ALL=C
4 sed 's/&/\&amp;/g;s/</\&lt;/g;s/>/\&gt;/g' | awk '
5 BEGIN {
6         getline
7         print "<html><head><title>" $0 "</title></head><body><pre>"
8         print
9
10         while (getline == 1) {
11                 if ($0 ~ /^Contents/)
12                         break
13                 print
14         }
15         print "<a name=\"Contents\" href=\"#Contents\">Contents</a>"
16
17         while (getline == 1) {
18                 id = $1
19                 if (id ~ /Annex/)
20                         id = $2
21                 if (id ~ /^([1-9A-Z]|Index|Foreword|Introduction|Bibliography)/) {
22                         if (match($0, /&lt;[a-zA-Z0-9_]*\.h&gt;/)) {
23                                 h=substr($0,RSTART,RLENGTH)
24                                 if (!(h in header))
25                                         header[h] = id
26                         }
27                         if (id ~ /\.$/)
28                                 id = substr(id,1,length(id)-1)
29                         s = "<a href=\"#" id "\">" $0
30                         if ($(NF-1) == ".")
31                                 print s "</a>"
32                         else{
33                                 print s
34                                 getline
35                                 print $0 "</a>"
36                         }
37                         if (id == "Index")
38                                 break
39                 } else
40                         print
41         }
42 }
43
44 !seenindex && /^ *([1-9A-Z]\.|Annex|Index|Foreword|Introduction|Bibliography)/ {
45         id = $1
46         if (id ~ /Annex/)
47                 id = $2
48         if (($0 ~ /^    [1-9]\./ || id ~ /^([A-Z]|[1-9A-Z]\.[1-9][0-9.]*|Index|Foreword|Introduction|Bibliography)$/) &&
49             (NF==1 || $2 ~ /^[A-Z]/) &&
50             ($0 !~ /^ *[0-9.]+[^0-9]$/)) {
51                 if (id ~ /\.$/)
52                         id = substr(id,1,length(id)-1)
53                 print "<a name=\"" id "\" href=\"#" id "\"><b>" $0 "</b></a>"
54                 if (id == "Index")
55                         seenindex=1
56                 next
57         }
58 }
59
60 /^\[page / {
61         p = substr($2,1,length($2)-1)
62         print "[<a name=\"p" p "\" href=\"#p" p "\">page " p "</a>] (<a href=\"#Contents\">Contents</a>)"
63         next
64 }
65
66 {
67         s = $0
68         p = ""
69         if (seenindex)
70                 r = "[ (][A-Z1-9][0-9.]*"
71         else
72                 r = "[ (][A-Z1-9]\\.[0-9.]*[0-9]"
73         while (match(s, r)) {
74                 p = p substr(s,1,RSTART)
75                 m = substr(s,RSTART+1,RLENGTH-1)
76                 if (m ~ /[4-9][0-9]/ || m ~ /[0-3][0-9][0-9]/ || substr(s,RSTART+RLENGTH,1) ~ /[a-zA-Z\-]/)
77                         p = p m
78                 else
79                         p = p "<a href=\"#" m "\">" m "</a>"
80                 s = substr(s,RSTART+RLENGTH)
81         }
82         s = p s
83         p = ""
84         while (match(s, /[Aa]nnex [A-Z]/)) {
85                 p = p substr(s,1,RSTART-1)
86                 m = substr(s,RSTART,RLENGTH)
87                 p = p "<a href=\"#" substr(m,RLENGTH,1) "\">" m "</a>"
88                 s = substr(s,RSTART+RLENGTH)
89         }
90         s = p s
91         p = ""
92         while (match(s, /&lt;[a-zA-Z0-9_]*\.h&gt;/)) {
93                 p = p substr(s,1,RSTART-1)
94                 m = substr(s,RSTART,RLENGTH)
95                 if (m in header)
96                         p = p "<a href=\"#" header[m] "\">" m "</a>"
97                 else
98                         p = p m
99                 s = substr(s,RSTART+RLENGTH)
100         }
101         print p s
102 }
103
104 END { print "</pre></body></html>" }'