diff --git a/README.md b/README.md index f54238a..0da0748 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ A high-performance SQL INSERT statement parser that processes large SQL files an - Parallel processing with configurable worker count - Multiple output formats: - JSON + - JSONL - CSV - Text - Buffered I/O for optimal performance @@ -25,7 +26,7 @@ go build cmd/sqlparser/main.go ## Usage ```bash -sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] +sqlparser [-format=txt|csv|json|jsonl] [-output=filename] [-workers=N] ``` ### Arguments @@ -34,6 +35,7 @@ sqlparser [-format=txt|csv|json] [-output=filename] [-workers=N] - `txt`: Human-readable text format - `csv`: CSV format with headers - `json`: JSON format with table structure + - `jsonl`: JSON lines format with table structure - `-output`: Output file path (default: stdout) - `-workers`: Number of worker threads (default: 1) - ``: Input SQL file containing INSERT statements @@ -64,6 +66,11 @@ sqlparser -format=csv -workers=4 -output=output.csv input.sql sqlparser input.sql ``` +4. Process SQL file and output as JSON lines: +```bash +sqlparser -format=jsonl -output=output.json input.sql +``` + ## Performance Optimization The parser is optimized for performance through: @@ -110,6 +117,12 @@ Row,id,name,email ] ``` +### JSONL Format +```json +{"table_name": "users", "rows": [{"row_number": 1, "data": {"id": "1", "name": "John Doe", "email": "john@example.com"}}]} +{"table_name": "users", "rows": [{"row_number": 2, "data": {"id": "2", "name": "John Doe", "email": "john@example.com"}}]} +``` + ## License This project is licensed under the MIT License - see the LICENSE file for details. \ No newline at end of file diff --git a/cmd/sqlparser/main.go b/cmd/sqlparser/main.go index 46d45ce..9d35581 100644 --- a/cmd/sqlparser/main.go +++ b/cmd/sqlparser/main.go @@ -12,7 +12,7 @@ import ( ) func main() { - format := flag.String("format", "txt", "Output format (txt, csv, json)") + format := flag.String("format", "txt", "Output format (txt, csv, json, jsonl)") output := flag.String("output", "", "Output file (if not specified, prints to stdout)") workers := flag.Int("workers", getWorkerCount(), "Number of worker threads") flag.Parse() diff --git a/pkg/models/types.go b/pkg/models/types.go index 7896967..cf363c2 100644 --- a/pkg/models/types.go +++ b/pkg/models/types.go @@ -21,12 +21,14 @@ func getBatchSize() int { type OutputFormat string const ( - FormatText OutputFormat = "txt" - FormatCSV OutputFormat = "csv" - FormatJSON OutputFormat = "json" + FormatText OutputFormat = "txt" + FormatCSV OutputFormat = "csv" + FormatJSON OutputFormat = "json" + FormatJSONL OutputFormat = "jsonl" ) type Row struct { + TableName string `json:"table_name,omitempty"` RowNumber int `json:"row_number"` Data map[string]interface{} `json:"data"` } diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 41d3bf0..d586086 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -88,6 +88,10 @@ func ProcessSQLFileInBatches(filename string, writer writer.Writer, numWorkers i // Process rows immediately for _, row := range result.rows { + if writer.Type() == models.FormatJSONL { + row.TableName = currentTableName + } + rowCount++ row.RowNumber = rowCount currentBatch = append(currentBatch, row) diff --git a/pkg/writer/csv_writer.go b/pkg/writer/csv_writer.go index dc0860e..c6bed7d 100644 --- a/pkg/writer/csv_writer.go +++ b/pkg/writer/csv_writer.go @@ -76,3 +76,7 @@ func (w *CSVWriter) Close() error { w.writer.Flush() return w.buffer.Flush() } + +func (w *CSVWriter) Type() models.OutputFormat { + return models.FormatCSV +} diff --git a/pkg/writer/json_writer.go b/pkg/writer/json_writer.go index e300ec0..ee9aa8e 100644 --- a/pkg/writer/json_writer.go +++ b/pkg/writer/json_writer.go @@ -85,3 +85,7 @@ func (w *JSONWriter) Close() error { } return w.writer.Flush() } + +func (w *JSONWriter) Type() models.OutputFormat { + return models.FormatJSON +} diff --git a/pkg/writer/jsonl_writer.go b/pkg/writer/jsonl_writer.go new file mode 100644 index 0000000..5a7c994 --- /dev/null +++ b/pkg/writer/jsonl_writer.go @@ -0,0 +1,54 @@ +package writer + +import ( + "bufio" + "encoding/json" + + "sqlparser/pkg/models" +) + +type JSONLWriter struct { + writer *bufio.Writer +} + +func NewJSONLWriter(output *bufio.Writer) (*JSONLWriter, error) { + return &JSONLWriter{writer: output}, nil +} + +func (w *JSONLWriter) WriteTableStart(tableName string) error { + return nil +} + +func (w *JSONLWriter) WriteRows(rows []models.Row) error { + if len(rows) == 0 { + return nil + } + + for i, row := range rows { + if i > 0 { + if _, err := w.writer.Write([]byte("\n")); err != nil { + return err + } + } + data, err := json.Marshal(row) + if err != nil { + return err + } + if _, err = w.writer.Write(data); err != nil { + return err + } + } + return w.writer.Flush() +} + +func (w *JSONLWriter) WriteTableEnd() error { + return nil +} + +func (w *JSONLWriter) Close() error { + return w.writer.Flush() +} + +func (w *JSONLWriter) Type() models.OutputFormat { + return models.FormatJSONL +} diff --git a/pkg/writer/text_writer.go b/pkg/writer/text_writer.go index 7d447c9..ba57b6a 100644 --- a/pkg/writer/text_writer.go +++ b/pkg/writer/text_writer.go @@ -52,3 +52,7 @@ func (w *TextWriter) WriteTableEnd() error { func (w *TextWriter) Close() error { return w.writer.Flush() } + +func (w *TextWriter) Type() models.OutputFormat { + return models.FormatText +} diff --git a/pkg/writer/writer.go b/pkg/writer/writer.go index 4930e95..f2f2b43 100644 --- a/pkg/writer/writer.go +++ b/pkg/writer/writer.go @@ -15,19 +15,17 @@ type Writer interface { WriteRows(rows []models.Row) error WriteTableEnd() error Close() error + Type() models.OutputFormat } func CreateWriter(format models.OutputFormat, output io.Writer) (Writer, error) { - bufferedWriter := bufio.NewWriterSize(output, bufferSize) switch format { case models.FormatJSON: - w, err := NewJSONWriter(bufferedWriter) - if err != nil { - return nil, err - } - return w, nil + return NewJSONWriter(bufferedWriter) + case models.FormatJSONL: + return NewJSONLWriter(bufferedWriter) case models.FormatCSV: return NewCSVWriter(bufferedWriter), nil case models.FormatText: