fix page links
[c-standard] / tohtml.sh
1 #!/bin/sh
2
3 export LC_ALL=C
4 sed 's/&/\&amp;/g;s/</\&lt;/g;s/>/\&gt;/g' | awk '
5 BEGIN {
6         getline
7         print "<html><head><title>" $0 "</title></head><body><pre>"
8         print
9
10         while (getline == 1) {
11                 if ($0 ~ /^Contents/)
12                         break
13                 print
14         }
15         print "<a name=\"Contents\" href=\"#Contents\">Contents</a>"
16
17         while (getline == 1) {
18                 id = $1
19                 if (id ~ /Annex/)
20                         id = $2
21                 if (id ~ /^([1-9A-Z]|Index|Foreword|Introduction|Bibliography)/) {
22                         if (match($0, /&lt;[a-zA-Z0-9_]*\.h&gt;/)) {
23                                 h=substr($0,RSTART,RLENGTH)
24                                 if (!(h in header))
25                                         header[h] = id
26                         }
27                         if (id ~ /\.$/)
28                                 id = substr(id,1,length(id)-1)
29                         s = "<a href=\"#" id "\">" $0
30                         if ($(NF-1) == ".")
31                                 print s "</a>"
32                         else{
33                                 print s
34                                 getline
35                                 print $0 "</a>"
36                         }
37                         if (id == "Index")
38                                 break
39                 } else
40                         print
41         }
42 }
43
44 !seenindex && /^ *([1-9A-Z]\.|Annex|Index|Foreword|Introduction|Bibliography)/ {
45         id = $1
46         if (id ~ /Annex/)
47                 id = $2
48         if (($0 ~ /^    [1-9]\./ || id ~ /^([A-Z]|[1-9A-Z]\.[1-9][0-9.]*|Index|Foreword|Introduction|Bibliography)$/) &&
49             (NF==1 || $2 ~ /^[A-Z]/)) {
50                 if (id ~ /\.$/)
51                         id = substr(id,1,length(id)-1)
52                 print "<a name=\"" id "\" href=\"#" id "\"><b>" $0 "</b></a>"
53                 if (id == "Index")
54                         seenindex=1
55                 next
56         }
57 }
58
59 /^\[page / {
60         p = substr($2,1,length($2)-1)
61         print "[<a name=\"p" p "\" href=\"#p" p "\">page " p "</a>] (<a href=\"#Contents\">Contents</a>)"
62         next
63 }
64
65 {
66         s = $0
67         p = ""
68         if (seenindex)
69                 r = "[ (][A-Z1-9][0-9.]*"
70         else
71                 r = "[ (][A-Z1-9]\\.[0-9.]*[0-9]"
72         while (match(s, r)) {
73                 p = p substr(s,1,RSTART)
74                 m = substr(s,RSTART+1,RLENGTH-1)
75                 if (m ~ /[4-9][0-9]/ || m ~ /[0-3][0-9][0-9]/ || substr(s,RSTART+RLENGTH,1) ~ /[a-zA-Z\-]/)
76                         p = p m
77                 else
78                         p = p "<a href=\"#" m "\">" m "</a>"
79                 s = substr(s,RSTART+RLENGTH)
80         }
81         s = p s
82         p = ""
83         while (match(s, /[Aa]nnex [A-Z]/)) {
84                 p = p substr(s,1,RSTART-1)
85                 m = substr(s,RSTART,RLENGTH)
86                 p = p "<a href=\"#" substr(m,RLENGTH,1) "\">" m "</a>"
87                 s = substr(s,RSTART+RLENGTH)
88         }
89         s = p s
90         p = ""
91         while (match(s, /&lt;[a-zA-Z0-9_]*\.h&gt;/)) {
92                 p = p substr(s,1,RSTART-1)
93                 m = substr(s,RSTART,RLENGTH)
94                 if (m in header)
95                         p = p "<a href=\"#" header[m] "\">" m "</a>"
96                 else
97                         p = p m
98                 s = substr(s,RSTART+RLENGTH)
99         }
100         print p s
101 }
102
103 END { print "</pre></body></html>" }'