-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathmake-sql
More file actions
executable file
·81 lines (71 loc) · 1.91 KB
/
make-sql
File metadata and controls
executable file
·81 lines (71 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env node
"use strict";
var fs = require('fs');
var byline = require('byline');
function formatValueMySQL(v) {
if (v == null) return 'NULL';
var t = typeof v;
if (t === 'string') return singleQuoteMySQL(v);
return v;
}
function formatValuePostgres(v) {
if (v == null) return 'NULL';
var t = typeof v;
if (t === 'string') return singleQuote(v);
if (Array.isArray(v)) return 'ARRAY[' + v.map(formatValuePostgres) + ']';
return v;
}
function singleQuoteMySQL(str) {
return singleQuote(str.replace(/\\/g, "\\\\"));
}
function singleQuote(str) {
return "'" + str.replace(/'/g, "''") + "'";
}
var outMySQL = fs.createWriteStream('tmp/wikipedia-raw-mysql.sql')
var outPostgres = fs.createWriteStream('tmp/wikipedia-raw-postgres.sql')
var lines = byline(fs.createReadStream('tmp/wikipedia.json', { encoding: 'utf8' }));
var cols = [
'__time',
'sometimeLater',
'channel',
'cityName',
'comment',
'commentLength',
'commentLengthStr',
'countryIsoCode',
'countryName',
'deltaBucket100',
'isAnonymous',
'isMinor',
'isNew',
'isRobot',
'isUnpatrolled',
'metroCode',
'namespace',
'page',
'regionIsoCode',
'regionName',
'user',
'userChars',
'delta',
'added',
'deleted',
'deltaByTen'
];
var postGres = cols;
var mySql = cols.filter(c => c != 'userChars')
var lineNumber = 0;
lines.on('data', function(line) {
var d = JSON.parse(line.toString());
d['__time'] = d['__time'].replace('Z', '');
d['sometimeLater'] = d['sometimeLater'].replace('Z', '');
var prefix = lineNumber % 100 ? ', (' : '; INSERT INTO wikipedia_raw VALUES (';
outMySQL.write(prefix + mySql.map((c) => { return formatValueMySQL(d[c]) }).join(', ') + ')\n', 'utf8');
outPostgres.write(prefix + cols.map((c) => { return formatValuePostgres(d[c]) }).join(', ') + ')\n', 'utf8');
lineNumber++;
})
lines.on('end', function() {
var v = ';\n';
outMySQL.write(v, 'utf8');
outPostgres.write(v, 'utf8');
});