forked from docyx/pc-part-dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
output.ts
59 lines (46 loc) · 1.41 KB
/
output.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import { mkdir, readFile, readdir, writeFile } from 'fs/promises'
import { join } from 'path'
import type { Part } from './types'
export const outputJsonLines = (parts: Part[]) =>
parts.map((p) => JSON.stringify(p)).join('\n')
const serializeCsvValue = (value: any): string => {
if (
typeof value === 'string' &&
(value.includes(',') || value.includes('"'))
) {
return `"${value.replaceAll('"', '""')}"`
} else if (Array.isArray(value)) {
return `"${value.map((v) => serializeCsvValue(v)).join(',')}"`
}
return value
}
export const outputCsv = (parts: Part[]) => {
let csv = ''
const keys = Object.keys(parts[0]!)
csv += keys.join(',') + '\n'
for (const part of parts) {
csv += Object.values(part)
.map((v) => serializeCsvValue(v))
.join(',')
csv += '\n'
}
return csv
}
;(async () => {
const dirName = process.argv.slice(2)[0] ?? 'data-staging'
const files = await readdir(join(dirName, 'json'))
await mkdir(join(dirName, 'csv'))
await mkdir(join(dirName, 'jsonl'))
for (const file of files) {
if (!file.endsWith('.json')) continue
const raw = await readFile(join(dirName, 'json', file))
const parts: Part[] = await JSON.parse(raw.toString())
const jsonl = outputJsonLines(parts)
await writeFile(
join(dirName, 'jsonl', file.replace('.json', '.jsonl')),
jsonl
)
const csv = outputCsv(parts)
await writeFile(join(dirName, 'csv', file.replace('.json', '.csv')), csv)
}
})()