newer c1x draft
[c-standard] / translit.sh
1 #!/bin/sh
2
3 # assumes utf8 locale..
4 # remove nonascii from the output of pdftotext -layout standard.pdf
5
6 sed '
7 s/\f/(newpage)/g
8 # utf8 fixes
9 s/fi/fi/g
10 s/fl/fl/g
11 s/§/!S/g
12 s/©/(C)/g
13 s/—/--/g
14 s/−/-/g
15 s/∗/*/g
16 s/ˆ/^/g
17 s/〈/</g
18 s/〉/>/g
19 s/⎡/[^/g
20 s/⎤/^]/g
21 s/⎣/[_/g
22 s/⎦/_]/g
23 s/⎢/[ /g
24 s/⎥/ ]/g
25 s/⎧/{/g
26 s/⎨/{/g
27 s/⎩/{/g
28 s/±/(+-)/g
29 s/≤/<=/g
30 s/≥/>=/g
31 s/≠/!=/g
32 s/Σ/(Sum)/g
33 s/√/sqrt:/g
34 s/π/pi/g
35 s/∞/(inf)/g
36 s/ƒ/fl./g
37 s/∫/(integral)/g
38 s/Γ/(Gamma)/g
39 s/×/x/g
40 s/•/o/g
41 s/⎯/-/g
42 s/↑/(uparrow)/g
43 s/↓/(downarrow)/g
44 s/↔/(<->)/g
45 s/→/(->)/g
46 s/‘/'\''/g
47 s/’/'\''/g
48 # pdftotext layout fixes
49 s/_ _/__/g
50 # floats are sometimes broken
51 s/\([0-9]\)\. \([0-9]\)/\1.\2/g
52 ' | LC_ALL=C tr -c '\n-~' '?' | awk '
53 BEGIN {
54         getline
55         last=$0
56         side=0
57 }
58 /^\(newpage\)/ {
59         n=split(last,a)
60         if(side)
61                 p=a[1]
62         else
63                 p=a[n]
64         side=!side
65         print "[page " p "]"
66         getline
67         getline
68         last=$0
69         next
70 }
71 {
72         print last
73         last=$0
74 }
75 '