Skip to content

feat: jsonl #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ A high-performance SQL INSERT statement parser that processes large SQL files an
- Parallel processing with configurable worker count
- Multiple output formats:
- JSON
- JSONL
- CSV
- Text
- Buffered I/O for optimal performance
Expand All @@ -25,7 +26,7 @@ go build cmd/sqlparser/main.go
## Usage

```bash
sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>
sqlparser [-format=txt|csv|json|jsonl] [-output=filename] [-workers=N] <sqlfile>
```

### Arguments
Expand All @@ -34,6 +35,7 @@ sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] <sqlfile>
- `txt`: Human-readable text format
- `csv`: CSV format with headers
- `json`: JSON format with table structure
- `jsonl`: JSON lines format with table structure
- `-output`: Output file path (default: stdout)
- `-workers`: Number of worker threads (default: 1)
- `<sqlfile>`: Input SQL file containing INSERT statements
Expand Down Expand Up @@ -64,6 +66,11 @@ sqlparser -format=csv -workers=4 -output=output.csv input.sql
sqlparser input.sql
```

4. Process SQL file and output as JSON lines:
```bash
sqlparser -format=jsonl -output=output.json input.sql
```

## Performance Optimization

The parser is optimized for performance through:
Expand Down Expand Up @@ -110,6 +117,12 @@ Row,id,name,email
]
```

### JSONL Format
```json
{"table_name": "users", "rows": [{"row_number": 1, "data": {"id": "1", "name": "John Doe", "email": "[email protected]"}}]}
{"table_name": "users", "rows": [{"row_number": 2, "data": {"id": "2", "name": "John Doe", "email": "[email protected]"}}]}
```

## License

This project is licensed under the MIT License - see the LICENSE file for details.
2 changes: 1 addition & 1 deletion cmd/sqlparser/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

func main() {
format := flag.String("format", "txt", "Output format (txt, csv, json)")
format := flag.String("format", "txt", "Output format (txt, csv, json, jsonl)")
output := flag.String("output", "", "Output file (if not specified, prints to stdout)")
workers := flag.Int("workers", getWorkerCount(), "Number of worker threads")
flag.Parse()
Expand Down
8 changes: 5 additions & 3 deletions pkg/models/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ func getBatchSize() int {
type OutputFormat string

const (
FormatText OutputFormat = "txt"
FormatCSV OutputFormat = "csv"
FormatJSON OutputFormat = "json"
FormatText OutputFormat = "txt"
FormatCSV OutputFormat = "csv"
FormatJSON OutputFormat = "json"
FormatJSONL OutputFormat = "jsonl"
)

type Row struct {
TableName string `json:"table_name,omitempty"`
RowNumber int `json:"row_number"`
Data map[string]interface{} `json:"data"`
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func ProcessSQLFileInBatches(filename string, writer writer.Writer, numWorkers i

// Process rows immediately
for _, row := range result.rows {
if writer.Type() == models.FormatJSONL {
row.TableName = currentTableName
}

rowCount++
row.RowNumber = rowCount
currentBatch = append(currentBatch, row)
Expand Down
4 changes: 4 additions & 0 deletions pkg/writer/csv_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,7 @@ func (w *CSVWriter) Close() error {
w.writer.Flush()
return w.buffer.Flush()
}

func (w *CSVWriter) Type() models.OutputFormat {
return models.FormatCSV
}
4 changes: 4 additions & 0 deletions pkg/writer/json_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,7 @@ func (w *JSONWriter) Close() error {
}
return w.writer.Flush()
}

func (w *JSONWriter) Type() models.OutputFormat {
return models.FormatJSON
}
54 changes: 54 additions & 0 deletions pkg/writer/jsonl_writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package writer

import (
"bufio"
"encoding/json"

"sqlparser/pkg/models"
)

type JSONLWriter struct {
writer *bufio.Writer
}

func NewJSONLWriter(output *bufio.Writer) (*JSONLWriter, error) {
return &JSONLWriter{writer: output}, nil
}

func (w *JSONLWriter) WriteTableStart(tableName string) error {
return nil
}

func (w *JSONLWriter) WriteRows(rows []models.Row) error {
if len(rows) == 0 {
return nil
}

for i, row := range rows {
if i > 0 {
if _, err := w.writer.Write([]byte("\n")); err != nil {
return err
}
}
data, err := json.Marshal(row)
if err != nil {
return err
}
if _, err = w.writer.Write(data); err != nil {
return err
}
}
return w.writer.Flush()
}

func (w *JSONLWriter) WriteTableEnd() error {
return nil
}

func (w *JSONLWriter) Close() error {
return w.writer.Flush()
}

func (w *JSONLWriter) Type() models.OutputFormat {
return models.FormatJSONL
}
4 changes: 4 additions & 0 deletions pkg/writer/text_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ func (w *TextWriter) WriteTableEnd() error {
func (w *TextWriter) Close() error {
return w.writer.Flush()
}

func (w *TextWriter) Type() models.OutputFormat {
return models.FormatText
}
10 changes: 4 additions & 6 deletions pkg/writer/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,17 @@ type Writer interface {
WriteRows(rows []models.Row) error
WriteTableEnd() error
Close() error
Type() models.OutputFormat
}

func CreateWriter(format models.OutputFormat, output io.Writer) (Writer, error) {

bufferedWriter := bufio.NewWriterSize(output, bufferSize)

switch format {
case models.FormatJSON:
w, err := NewJSONWriter(bufferedWriter)
if err != nil {
return nil, err
}
return w, nil
return NewJSONWriter(bufferedWriter)
case models.FormatJSONL:
return NewJSONLWriter(bufferedWriter)
case models.FormatCSV:
return NewCSVWriter(bufferedWriter), nil
case models.FormatText:
Expand Down