create an index, as this heavily speeds up some queries
[libfirm] / scripts / statev_sql.py
1 #! /usr/bin/env python
2
3 import sys
4 import os
5 import re
6 import time
7 import stat
8 import fileinput
9 import tempfile
10 import optparse
11
12 class DummyFilter:
13         def match(self, dummy):
14                 return True
15
16 class EmitBase:
17         def create_table(self, cols, name, type, unique):
18                 create = 'create table if not exists %s (id int %s' % (name, unique)
19
20                 sorted = [None] * len(cols)
21                 for x in cols.iterkeys():
22                         sorted[cols[x]] = x
23                 for x in sorted:
24                         create += (", '%s' %s" % (x, type))
25                 create += ');'
26                 return create
27
28 class EmitMysqlInfile(EmitBase):
29         tmpfile_mode = stat.S_IREAD | stat.S_IROTH | stat.S_IWUSR
30
31         def ex(self, args, tab, fname):
32                 res = os.fork()
33                 if res == 0:
34                         stmt = """load data infile '%s' into table %s fields terminated by ';'""" % (fname, tab)
35                         conn = MySQLdb.connect(**args)
36                         c = conn.cursor()
37                         c.execute(stmt)
38                         conn.commit()
39                         sys.exit(0)
40                 return res
41
42         def __init__(self, options, tables, ctxcols, evcols):
43                 import MySQLdb
44
45                 args = dict()
46                 if options.password:
47                         args['passwd'] = options.password
48                 if not options.host:
49                         options.host = 'localhost'
50                 args['user'] = options.user
51                 args['host'] = options.host
52                 args['db']   = options.database
53
54                 self.conn     = MySQLdb.connect(**args)
55                 self.ctxcols  = ctxcols
56                 self.evcols   = evcols
57                 self.options  = options
58                 self.ctxtab   = tables['ctx']
59                 self.evtab    = tables['ev']
60
61                 params = (tempfile.gettempdir(), os.sep, os.getpid())
62                 self.evfifo  = '%s%sstatev_ev_%d' % params
63                 self.ctxfifo = '%s%sstatev_ctx_%d' % params
64
65                 os.mkfifo(self.evfifo)
66                 os.mkfifo(self.ctxfifo)
67
68                 os.chmod(self.evfifo,  self.tmpfile_mode)
69                 os.chmod(self.ctxfifo, self.tmpfile_mode)
70
71                 c = self.conn.cursor()
72                 c.execute('drop table if exists ' + self.evtab)
73                 c.execute('drop table if exists ' + self.ctxtab)
74                 table_ctx = self.create_table(self.ctxcols, self.ctxtab, 'char(80)', 'unique')
75                 c.execute(table_ctx)
76                 table_ev = self.create_table(self.evcols, self.evtab, 'double default null', '')
77                 c.execute(table_ev)
78                 self.conn.commit()
79
80                 if options.verbose:
81                         print 'go for gold'
82
83                 self.pidev  = self.ex(args, self.evtab, self.evfifo)
84                 self.pidctx = self.ex(args, self.ctxtab, self.ctxfifo)
85
86                 if options.verbose:
87                         print "forked two mysql leechers: %d, %d" % (self.pidev, self.pidctx)
88
89                 self.evfile   = open(self.evfifo, 'w+t')
90                 self.ctxfile  = open(self.ctxfifo, 'w+t')
91
92                 if options.verbose:
93                         print 'fifo:  %s, %o' % (self.evfile.name, os.stat(self.evfile.name).st_mode)
94                         print 'fifo:  %s, %o' % (self.ctxfile.name, os.stat(self.ctxfile.name).st_mode)
95
96         def ev(self, curr_id, evitems):
97                 field = ['\N'] * len(self.evcols)
98                 for key, val in evitems.iteritems():
99                         index = self.evcols[key]
100                         field[index] = val
101                 print >> self.evfile, ('%d;' % curr_id) + ';'.join(field)
102
103         def ctx(self, curr_id, ctxitems):
104                 field = ['\N'] * len(self.ctxcols)
105                 for key, val in ctxitems.iteritems():
106                         index = self.ctxcols[key]
107                         field[index] = val
108                 print >> self.ctxfile, ('%d;' % curr_id) + ';'.join(field)
109
110         def commit(self):
111                 self.evfile.close()
112                 self.ctxfile.close()
113
114                 os.waitpid(self.pidev, 0)
115                 os.waitpid(self.pidctx, 0)
116
117                 os.unlink(self.evfile.name)
118                 os.unlink(self.ctxfile.name)
119
120
121 class EmitSqlite3(EmitBase):
122         def __init__(self, options, tables, ctxcols, evcols):
123                 import sqlite3
124
125                 if options.database == None:
126                         print "Have to specify database (file-)name for sqlite"
127                         sys.exit(1)
128
129                 if os.path.isfile(options.database):
130                         os.unlink(options.database)
131
132                 self.ctxtab = tables['ctx']
133                 self.evtab  = tables['ev']
134                 self.conn = sqlite3.connect(options.database)
135                 table_ctx = self.create_table(ctxcols, self.ctxtab, 'text', 'unique')
136                 self.conn.execute(table_ctx)
137                 self.conn.execute("CREATE INDEX IF NOT EXISTS ctxindex ON ctx(id)")
138                 table_ev = self.create_table(evcols, self.evtab, 'double', '')
139                 self.conn.execute(table_ev)
140                 self.conn.execute("CREATE INDEX IF NOT EXISTS evindex ON ev(id)")
141
142                 n = max(len(ctxcols), len(evcols)) + 1
143                 q = ['?']
144                 self.quests = []
145                 for i in xrange(0, n):
146                         self.quests.append(','.join(q))
147                         q.append('?')
148
149         def ev(self, curr_id, evitems):
150                 keys = ""
151                 first = True
152                 for key in evitems.keys():
153                         if first:
154                                 first = False
155                         else:
156                                 keys += ", "
157                         keys += "'%s'" % (key)
158
159                 stmt = "insert into '%s' (id, %s) values (%s)" % (self.evtab, keys, self.quests[len(evitems)])
160                 self.conn.execute(stmt, (curr_id,) + tuple(evitems.values()))
161
162         def ctx(self, curr_id, ctxitems):
163                 keys = ""
164                 first = True
165                 for key in ctxitems.keys():
166                         if first:
167                                 first = False
168                         else:
169                                 keys += ", "
170                         keys += "'%s'" % (key)
171
172                 stmt = "insert into '%s' (id, %s) values (%s)" % (self.ctxtab, keys, self.quests[len(ctxitems)])
173                 self.conn.execute(stmt, (curr_id,) + tuple(ctxitems.values()))
174
175         def commit(self):
176                 self.conn.commit()
177
178 class Conv:
179         engines = { 'sqlite3': EmitSqlite3, 'mysql': EmitMysqlInfile }
180         def find_heads(self):
181                 n_ev    = 0
182                 ctxind  = 0
183                 evind   = 0
184                 ctxcols = dict()
185                 evcols  = dict()
186
187                 self.valid_keys = set()
188
189                 inp = self.input()
190
191                 for line in inp:
192                         if line[0] == 'P':
193                                 ind = line.index(';', 2)
194                                 key = line[2:ind]
195                                 if not ctxcols.has_key(key):
196                                         ctxcols[key] = ctxind
197                                         ctxind += 1
198
199                         elif line[0] == 'E':
200                                 ind = line.index(';', 2)
201                                 key = line[2:ind]
202                                 if self.filter.match(key):
203                                         self.n_events += 1
204                                         if not evcols.has_key(key):
205                                                 self.valid_keys.add(key)
206                                                 evcols[key] = evind
207                                                 evind += 1
208
209                 return (ctxcols, evcols)
210
211         def input(self):
212                 return fileinput.FileInput(files=self.files, openhook=fileinput.hook_compressed)
213
214         def fill_tables(self):
215                 lineno     = 0
216                 ids        = 0
217                 curr_id    = 0
218                 last_push_curr_id = 0
219                 keystack   = []
220                 idstack    = []
221                 curr_event = 0
222                 last_prec  = -1
223                 evcols     = dict()
224                 ctxcols    = dict()
225
226                 for line in self.input():
227                         lineno += 1
228                         items = line.strip().split(';')
229                         op    = items[0]
230
231                         if op == 'P':
232                                 # flush the current events
233                                 if len(evcols):
234                                         self.emit.ev(last_push_curr_id, evcols)
235                                         evcols.clear()
236
237                                 # push the key
238                                 key   = items[1]
239                                 val   = items[2]
240                                 keystack.append(key)
241                                 curr_id = ids
242                                 last_push_curr_id = curr_id
243                                 ids += 1
244                                 idstack.append(curr_id)
245                                 ctxcols[key] = val
246
247                                 self.emit.ctx(curr_id, ctxcols)
248
249                         elif op == 'O':
250                                 popkey = items[1]
251                                 key = keystack.pop()
252
253                                 if popkey != key:
254                                         print "unmatched pop in line %d, push key %s, pop key: %s" % (lineno, key, popkey)
255
256                                 idstack.pop()
257                                 if len(idstack) > 0:
258                                         if len(evcols) > 0:
259                                                 self.emit.ev(curr_id, evcols)
260                                                 evcols.clear()
261                                         del ctxcols[key]
262                                         curr_id = idstack[-1]
263                                 else:
264                                         curr_id = -1
265
266                         elif op == 'E':
267                                 key = items[1]
268                                 if key in self.valid_keys:
269                                         curr_event += 1
270                                         evcols[key] = items[2]
271
272                                         if self.verbose:
273                                                 prec = curr_event * 10 / self.n_events
274                                                 if prec > last_prec:
275                                                         last_prec = prec
276                                                         print '%10d / %10d' % (curr_event, self.n_events)
277
278         def __init__(self):
279                 parser = optparse.OptionParser('usage: %prog [options]  <event file...>')
280                 parser.add_option("-c", "--clean",    dest="clean",    help="delete tables in advance", action="store_true", default=False)
281                 parser.add_option("-v", "--verbose",  dest="verbose",  help="verbose messages",         action="store_true", default=False)
282                 parser.add_option("-f", "--filter",   dest="filter",   help="regexp to filter event keys", metavar="REGEXP")
283                 parser.add_option("-u", "--user",     dest="user",     help="user",               metavar="USER")
284                 parser.add_option("-H", "--host",     dest="host",     help="host",               metavar="HOST")
285                 parser.add_option("-p", "--password", dest="password", help="password",           metavar="PASSWORD")
286                 parser.add_option("-d", "--db",       dest="database", help="database",           metavar="DB")
287                 parser.add_option("-e", "--engine",   dest="engine",   help="engine",             metavar="ENG", default='sqlite3')
288                 parser.add_option("-P", "--prefix",   dest="prefix",   help="table prefix",       metavar="PREFIX", default='')
289                 (options, args) = parser.parse_args()
290
291                 self.n_events = 0
292                 self.stmts    = dict()
293                 self.verbose  = options.verbose
294
295                 tables = dict()
296                 tables['ctx'] = options.prefix + 'ctx'
297                 tables['ev']  = options.prefix + 'ev'
298
299                 if len(args) < 1:
300                         parser.print_help()
301                         sys.exit(1)
302
303                 self.files  = []
304                 files       = args
305
306                 for file in files:
307                         if not os.path.isfile(file):
308                                 print "cannot find input file %s" % (file, )
309                         else:
310                                 self.files.append(file)
311
312                 if len(self.files) < 1:
313                         print "no input file to process"
314                         sys.exit(3)
315
316                 if options.filter:
317                         self.filter = re.compile(options.filter)
318                 else:
319                         self.filter = DummyFilter()
320
321                 if options.engine in self.engines:
322                         engine = self.engines[options.engine]
323                 else:
324                         print 'engine %s not found' % options.engine
325                         print 'we offer: %s' % self.engines.keys()
326                         sys.exit(0)
327
328                 if options.verbose:
329                         print "determining schema..."
330
331                 (ctxcols, evcols) = self.find_heads()
332                 if options.verbose:
333                         print "context schema:"
334                         print ctxcols
335                         print "event schema:"
336                         print evcols
337                         print "tables:"
338                         print tables
339
340                 self.emit = engine(options, tables, ctxcols, evcols)
341
342                 if options.verbose:
343                         print "filling tables..."
344                 self.fill_tables()
345                 if options.verbose:
346                         print "comitting..."
347                 self.emit.commit()
348
349 if __name__ == "__main__":
350         Conv()