projects
/
c-standard
/ blob
commit
grep
author
committer
pickaxe
?
search:
re
0bf85a05a3c524d549f0c71c11702cdb5c4510fd
[c-standard]
/
translit.sh
1
#!/bin/sh
2
3
# assumes utf8 locale..
4
# remove nonascii from the output of pdftotext -layout standard.pdf
5
6
sed '
7
s/\f/(newpage)/g
8
# utf8 fixes
9
s/fi/fi/g
10
s/fl/fl/g
11
s/§/!S/g
12
s/©/(C)/g
13
s/—/--/g
14
s/−/-/g
15
s/∗/*/g
16
s/ˆ/^/g
17
s/〈/</g
18
s/〉/>/g
19
s/⎡/[^/g
20
s/⎤/^]/g
21
s/⎣/[_/g
22
s/⎦/_]/g
23
s/⎢/[ /g
24
s/⎥/ ]/g
25
s/⎧/{/g
26
s/⎨/{/g
27
s/⎩/{/g
28
s/±/(+-)/g
29
s/≤/<=/g
30
s/≥/>=/g
31
s/≠/!=/g
32
s/Σ/(Sum)/g
33
s/√/sqrt:/g
34
s/π/pi/g
35
s/∞/(inf)/g
36
s/ƒ/fl./g
37
s/∫/(integral)/g
38
s/Γ/(Gamma)/g
39
s/×/x/g
40
s/•/o/g
41
s/⎯/-/g
42
s/↑/(uparrow)/g
43
s/↓/(downarrow)/g
44
s/↔/(<->)/g
45
s/→/(->)/g
46
s/‘/'\''/g
47
s/’/'\''/g
48
# pdftotext layout fixes
49
s/_ _/__/g
50
s/\([0-9]\). \([0-9]\)/\1.\2/g
51
' | LC_ALL=C tr -c '\n-~' '?' | awk '
52
BEGIN {
53
getline
54
last=$0
55
side=0
56
}
57
/^\(newpage\)/ {
58
n=split(last,a)
59
if(side)
60
p=a[1]
61
else
62
p=a[n]
63
side=!side
64
print "[page " p "]"
65
getline
66
getline
67
last=$0
68
next
69
}
70
{
71
print last
72
last=$0
73
}
74
'