From 423fc2e04434579f7bc41fcc17dc2780e58a7673 Mon Sep 17 00:00:00 2001 From: Vitkovskii Vladimir Date: Sun, 15 Mar 2026 00:34:57 +0300 Subject: [PATCH 1/5] replace strconv.Atoi with zero-alloc fastPositiveAtoi in Dig --- insane.go | 21 +++++++++++++++++++-- insane_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/insane.go b/insane.go index afc7f71..18d8256 100644 --- a/insane.go +++ b/insane.go @@ -794,8 +794,8 @@ get: } return nil getArray: - index, err := strconv.Atoi(curField) - if err != nil || index < 0 || index >= len(node.nodes) { + index := fastPositiveAtoi(curField) + if index < 0 || index >= len(node.nodes) { return nil } curDepth++ @@ -2097,6 +2097,23 @@ func shouldEscape(s string) bool { return false } +// fastPositiveAtoi parses non-negative integer from string without allocations. +// Returns -1 if string is empty, not a valid number, or negative. +func fastPositiveAtoi(s string) int { + if len(s) == 0 { + return -1 + } + n := 0 + for i := 0; i < len(s); i++ { + c := s[i] - '0' + if c > 9 { + return -1 + } + n = n*10 + int(c) + } + return n +} + func decodeInt64(s string) int64 { l := len(s) if l == 0 { diff --git a/insane_test.go b/insane_test.go index 6a19a32..12494fd 100644 --- a/insane_test.go +++ b/insane_test.go @@ -1054,3 +1054,33 @@ func TestIndex(t *testing.T) { assert.Equal(t, index, node.getIndex(), "wrong index") } + +func TestFastPositiveAtoi(t *testing.T) { + tests := []struct { + input string + expected int + }{ + {"0", 0}, + {"1", 1}, + {"9", 9}, + {"10", 10}, + {"123", 123}, + {"999999", 999999}, + {"", -1}, + {"-1", -1}, + {"-123", -1}, + {"abc", -1}, + {"12abc", -1}, + {"abc12", -1}, + {" 1", -1}, + {"1 ", -1}, + {"+1", -1}, + {"1.5", -1}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + assert.Equal(t, tt.expected, fastPositiveAtoi(tt.input)) + }) + } +} From 5da89095207546036dce91abf82340500191c9af Mon Sep 17 00:00:00 2001 From: Vitkovskii Vladimir Date: Sun, 15 Mar 2026 00:36:42 +0300 Subject: [PATCH 2/5] numbersMap slice to fixed array, include digits to simplify number parsing loop --- insane.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/insane.go b/insane.go index 18d8256..2551d45 100644 --- a/insane.go +++ b/insane.go @@ -71,7 +71,7 @@ var ( }, } - numbersMap = make([]byte, 256) + numbersMap [256]byte // decode errors ErrEmptyJSON = errors.New("json is empty") @@ -98,6 +98,9 @@ var ( ) func init() { + for c := byte('0'); c <= '9'; c++ { + numbersMap[c] = 1 + } numbersMap['.'] = 1 numbersMap['-'] = 1 numbersMap['e'] = 1 @@ -518,7 +521,7 @@ decode: default: o-- t = o - for ; o != l && ((json[o] >= '0' && json[o] <= '9') || numbersMap[json[o]] == 1); o++ { + for ; o != l && numbersMap[json[o]] == 1; o++ { } if t == o { return nil, insaneErr(ErrExpectedValue, json, o) From 3700a9b0cbc1fff2e2083d8050e80b18d0bd527f Mon Sep 17 00:00:00 2001 From: Vitkovskii Vladimir Date: Sun, 15 Mar 2026 00:50:37 +0300 Subject: [PATCH 3/5] sentinel byte in decode buffer, add fuzz tests --- insane.go | 5 ++- insane_test.go | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/insane.go b/insane.go index 2551d45..8b1fc09 100644 --- a/insane.go +++ b/insane.go @@ -213,8 +213,9 @@ func (d *decoder) decode(json string, shouldReset bool) (*Node, error) { o := len(d.buf) d.buf = append(d.buf, json...) + d.buf = append(d.buf, 0) // sentinel byte for safe loop termination json = toString(d.buf) - l := len(json) + l := len(json) - 1 nodePool := d.nodePool nodePoolLen := len(nodePool) @@ -521,7 +522,7 @@ decode: default: o-- t = o - for ; o != l && numbersMap[json[o]] == 1; o++ { + for ; numbersMap[json[o]] == 1; o++ { } if t == o { return nil, insaneErr(ErrExpectedValue, json, o) diff --git a/insane_test.go b/insane_test.go index 12494fd..ba15ce8 100644 --- a/insane_test.go +++ b/insane_test.go @@ -1,6 +1,7 @@ package insaneJSON import ( + "encoding/json" "math/rand" "strconv" "strings" @@ -1084,3 +1085,122 @@ func TestFastPositiveAtoi(t *testing.T) { }) } } + +func FuzzDecodeEncode(f *testing.F) { + f.Add([]byte(`{}`)) + f.Add([]byte(`[]`)) + f.Add([]byte(`null`)) + f.Add([]byte(`true`)) + f.Add([]byte(`false`)) + f.Add([]byte(`0`)) + f.Add([]byte(`-1`)) + f.Add([]byte(`3.14`)) + f.Add([]byte(`1e10`)) + f.Add([]byte(`""`)) + f.Add([]byte(`"hello"`)) + f.Add([]byte(`"hello\"world"`)) + f.Add([]byte(`"hello\\\"world"`)) + f.Add([]byte(`"\u0000"`)) + f.Add([]byte(`"\n\r\t"`)) + f.Add([]byte(`{"a":"b"}`)) + f.Add([]byte(`{"a":1,"b":2,"c":3}`)) + f.Add([]byte(`[1,2,3]`)) + f.Add([]byte(`[{"a":"b"},{"c":"d"}]`)) + f.Add([]byte(`{"a":{"b":{"c":"d"}}}`)) + f.Add([]byte(`{"a":[1,[2,[3]]]}`)) + f.Add([]byte(` {"a" : "b"} `)) + f.Add([]byte(`[null,true,false,1,"s",{},[]]`)) + + f.Fuzz(func(t *testing.T, data []byte) { + root, err := DecodeBytes(data) + if err != nil { + return + } + defer Release(root) + + // encode must not panic + encoded := root.EncodeToString() + + // re-decode encoded result must succeed + root2, err := DecodeString(encoded) + if err != nil { + t.Fatalf("re-decode failed: %v\ninput: %q\nencoded: %q", err, data, encoded) + } + defer Release(root2) + + // double encode must be stable + encoded2 := root2.EncodeToString() + if encoded != encoded2 { + t.Fatalf("encode not stable\nfirst: %q\nsecond: %q", encoded, encoded2) + } + }) +} + +func FuzzDecodeDig(f *testing.F) { + f.Add([]byte(`{"a":{"b":[1,2,3]}}`), "a", "b", "1") + f.Add([]byte(`[0,1,2]`), "0", "", "") + f.Add([]byte(`{"x":"y"}`), "x", "", "") + f.Add([]byte(`{"a":{"b":{"c":"d"}}}`), "a", "b", "c") + + f.Fuzz(func(t *testing.T, data []byte, p1, p2, p3 string) { + root, err := DecodeBytes(data) + if err != nil { + return + } + defer Release(root) + + path := make([]string, 0, 3) + for _, p := range []string{p1, p2, p3} { + if p != "" { + path = append(path, p) + } + } + + // Dig must not panic + node := root.Dig(path...) + if node == nil { + return + } + + // type checks must not panic + _ = node.IsObject() + _ = node.IsArray() + _ = node.IsString() + _ = node.IsNumber() + _ = node.IsNull() + _ = node.IsTrue() + _ = node.IsFalse() + + // value extraction must not panic + _ = node.AsString() + _ = node.AsInt() + _ = node.AsFloat() + _ = node.AsBool() + }) +} + +func FuzzDecodeValidJSON(f *testing.F) { + f.Add([]byte(`{"a":"b","c":[1,2,3],"d":true,"e":null}`)) + f.Add([]byte(`[1,"two",3.0,true,null,{"a":"b"}]`)) + + f.Fuzz(func(t *testing.T, data []byte) { + // only fuzz inputs that stdlib considers valid + if !json.Valid(data) { + return + } + + root, err := DecodeBytes(data) + if err != nil { + t.Fatalf("stdlib says valid but decode failed: %v\ninput: %q", err, data) + } + defer Release(root) + + encoded := root.EncodeToString() + + root2, err := DecodeString(encoded) + if err != nil { + t.Fatalf("re-decode failed: %v\nencoded: %q", err, encoded) + } + defer Release(root2) + }) +} From c30b6655c4d8e04dc19c22110612ba09e3899f90 Mon Sep 17 00:00:00 2001 From: Vitkovskii Vladimir Date: Sun, 15 Mar 2026 22:19:09 +0300 Subject: [PATCH 4/5] chunk-based node pool allocation for better cache locality --- insane.go | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/insane.go b/insane.go index 8b1fc09..a600713 100644 --- a/insane.go +++ b/insane.go @@ -151,10 +151,11 @@ type StrictNode struct { } type decoder struct { - buf []byte - root Root - nodePool []*Node - nodeCount int + buf []byte + root Root + nodePool []*Node + nodeChunks [][]Node + nodeCount int } /* @@ -1810,16 +1811,20 @@ func (n *Node) getIndex() int { // ******************** // func (d *decoder) initPool() { - d.nodePool = make([]*Node, StartNodePoolSize, StartNodePoolSize) - for i := 0; i < StartNodePoolSize; i++ { - d.nodePool[i] = &Node{} + chunk := make([]Node, StartNodePoolSize) + d.nodeChunks = [][]Node{chunk} + d.nodePool = make([]*Node, StartNodePoolSize) + for i := range chunk { + d.nodePool[i] = &chunk[i] } } func (d *decoder) expandPool() []*Node { - c := cap(d.nodePool) - for i := 0; i < c; i++ { - d.nodePool = append(d.nodePool, &Node{}) + c := len(d.nodePool) + chunk := make([]Node, c) + d.nodeChunks = append(d.nodeChunks, chunk) + for i := range chunk { + d.nodePool = append(d.nodePool, &chunk[i]) } return d.nodePool From 6da7d49f40f0a506e506fa48d05a491e6d5b81e3 Mon Sep 17 00:00:00 2001 From: Vitkovskii Vladimir Date: Fri, 20 Mar 2026 23:46:54 +0300 Subject: [PATCH 5/5] add README with examples and API reference, update gitignore --- .gitignore | 3 +- README.md | 383 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 320 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 720907b..617724e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ cpu.out -insane-json.test \ No newline at end of file +*.prof +*.test \ No newline at end of file diff --git a/README.md b/README.md index 51d38ea..cf7e245 100644 --- a/README.md +++ b/README.md @@ -1,93 +1,346 @@ # Insane JSON -Lighting fast and simple JSON decode/encode library for GO -## Key features -To be filled +Fast, zero-allocation JSON library for Go. Decode, navigate, mutate, and encode JSON without unmarshalling into Go structs. Designed for high-throughput pipelines where performance matters. + +## Installation + +```bash +go get github.com/ozontech/insane-json +``` + +## Quick Start -## Usage ```go - // ==== DECODE API ==== - root, err = insaneJSON.DecodeString(jsonString) // from string - root, err = insaneJSON.DecodeBytes(jsonBytes) // from byte slice - defer insaneJSON.Release(root) // place root back to pool +root, err := insaneJSON.DecodeString(`{"name":"John","age":30}`) +if err != nil { + panic(err) +} +defer insaneJSON.Release(root) - // ==== GET API ==== - code = root.Dig("response", "code").AsInt() // int from objects - body = root.Dig("response", "body").AsString() // string from objects +name := root.Dig("name").AsString() // "John" +age := root.Dig("age").AsInt() // 30 - keys = []string{"items", "3", "name"} - thirdItemName = root.Dig(keys...).AsString() // string from objects and array +root.Dig("age").MutateToInt(31) +root.AddField("active").MutateToBool(true) - // ==== CHECK API ==== - isObject = root.Dig("response").IsObject() // is value object? - isInt = root.Dig("response", "code").IsInt() // is value null? - isArray = root.Dig("items").IsArray() // is value array? +output := root.Encode(nil) // []byte: {"name":"John","age":31,"active":true} +``` - // ==== DELETE API ==== - root.Dig("response", "code").Suicide() // delete object field - root.Dig("items", "3").Suicide() // delete array element - anyDugNode.Suicide() // delete any previously dug node +## Examples - // ==== MODIFY API ==== - root.Dig("response", "code").MutateToString("OK") // convert to string - root.Dig("items", "3").MutateToObject() // convert to empty object +### Extracting fields from API response - item = `{"name":"book","weight":1000}` - err = root.Dig("items", "3").MutateToJSON(item) // convert to parsed JSON +```go +root, err := insaneJSON.DecodeBytes(responseBody) +if err != nil { + return err +} +defer insaneJSON.Release(root) - // ==== OBJECT API ==== - response = root.Dig("response") // get object - fields = response.AsFields() // get object fields +status := root.Dig("response", "status").AsString() +code := root.Dig("response", "code").AsInt() +items := root.Dig("response", "data", "items") - for _, field = range(fields) { - fmt.Println(field.AsField()) // print all object fields +if items.IsArray() { + for _, item := range items.AsArray() { + id := item.Dig("id").AsInt() + name := item.Dig("name").AsString() + fmt.Printf("id=%d name=%s\n", id, name) } +} +``` - for _, field = range(fields) { - response.Dig(field.AsField()).Suicide() // remove all fields - } +### Transforming JSON logs - for _, field = range(fields) { - field.Suicide() // simpler way to remove all fields - } - - header="Content-Encoding: gzip" - response.AddField("header").MutateToString(header) // add new field and set value +```go +root, err := insaneJSON.DecodeBytes(logLine) +if err != nil { + return err +} +defer insaneJSON.Release(root) + +// add tracing info +root.AddField("trace_id").MutateToString(traceID) +root.AddField("processed_at").MutateToString(time.Now().Format(time.RFC3339)) + +// remove sensitive data +root.Dig("request", "headers", "Authorization").Suicide() +root.Dig("request", "body", "password").Suicide() - // ==== ARRAY API ==== - items = root.Dig("items") // get array - elements = items.AsArray() // get array elements +// rename field +root.DigField("level").MutateToField("log_level") - for _, element = range(elements) { - fmt.Println(element.AsString()) // print all array elements +output = root.Encode(output[:0]) +``` + +### Filtering array elements + +```go +root, err := insaneJSON.DecodeString(`{"users":[{"name":"Alice","active":true},{"name":"Bob","active":false},{"name":"Carol","active":true}]}`) +if err != nil { + return err +} +defer insaneJSON.Release(root) + +users := root.Dig("users") +for _, user := range users.AsArray() { + if !user.Dig("active").AsBool() { + user.Suicide() } +} + +fmt.Println(root.EncodeToString()) +// {"users":[{"name":"Alice","active":true},{"name":"Carol","active":true}]} +``` + +### High-throughput processing with Root reuse + +```go +root := insaneJSON.Spawn() +defer insaneJSON.Release(root) + +buf := make([]byte, 0, 4096) - for _, element = range(elements) { - element.Suicide() // remove all elements +scanner := bufio.NewScanner(file) +for scanner.Scan() { + if err := root.DecodeBytes(scanner.Bytes()); err != nil { + continue } - item = `{"name":"book","weight":1000}` - err = items.AddElement().MutateToJSON(item) // add new element and set value + root.AddField("source").MutateToString("pipeline-v2") + + buf = root.Encode(buf[:0]) + writer.Write(buf) +} +``` + +### Working with nested JSON + +```go +root, err := insaneJSON.DecodeString(`{"a":{"b":{"c":"deep"}}}`) +if err != nil { + return err +} +defer insaneJSON.Release(root) + +// Dig traverses nested objects +value := root.Dig("a", "b", "c").AsString() // "deep" + +// array elements accessed by string index +root2, _ := insaneJSON.DecodeString(`{"items":["zero","one","two"]}`) +defer insaneJSON.Release(root2) + +second := root2.Dig("items", "1").AsString() // "one" +``` + +### Strict mode with error handling + +```go +root, err := insaneJSON.DecodeString(`{"count":"not a number"}`) +if err != nil { + return err +} +defer insaneJSON.Release(root) + +node, err := root.DigStrict("count") +if err != nil { + return err // insaneJSON.ErrNotFound +} + +count, err := node.AsInt() +if err != nil { + return err // insaneJSON.ErrNotNumber +} +``` + +### Merging objects + +```go +root, _ := insaneJSON.DecodeString(`{"a":"1","b":"2"}`) +defer insaneJSON.Release(root) + +patch, _ := root.DecodeStringAdditional(`{"b":"updated","c":"3"}`) + +root.MergeWith(patch) +fmt.Println(root.EncodeToString()) +// {"a":"1","b":"updated","c":"3"} +``` + +## API Overview - // ==== ENCODE API ==== - To be filled +### Decode - // ==== STRICT API ==== - items = root.Dig("items").InStrictMode() // convert value to strict mode - items, err = root.DigStrict("items") // or get strict value directly - - o, err = items.AsObject() // now value has api with error handling - name, err = items.Dig("5").Dig("name").AsInt // err won't be nil since name is a string +| Function | Description | +|---|---| +| `DecodeString(json) (*Root, error)` | Decode JSON string, returns Root from pool | +| `DecodeBytes(json) (*Root, error)` | Decode JSON byte slice | +| `Spawn() *Root` | Get an empty Root from pool | +| `Release(root)` | Return Root to pool | +| `root.DecodeString(json) error` | Reuse Root to decode another JSON | +| `root.DecodeBytes(json) error` | Reuse Root to decode another JSON | +| `root.DecodeStringAdditional(json) (*Node, error)` | Decode JSON using Root's node pool without clearing | - // ==== POOL API ==== - root, err = insaneJSON.DecodeString(json) // get a root from the pool and place decoded json into it - emptyRoot = insaneJSON.Spawn() // get an empty root from the pool +### Navigate - root.DecodeString(emptyRoot, anotherJson) // reuse a root to decode another JSONs +| Function | Description | +|---|---| +| `node.Dig(path...) *Node` | Navigate to nested value. Returns nil if not found | +| `node.DigStrict(path...) (*StrictNode, error)` | Same as Dig but returns error if not found | +| `node.AsFields() []*Node` | Get object field nodes | +| `node.AsArray() []*Node` | Get array element nodes | +| `node.AsFieldValue() *Node` | Get value node from field node | +| `node.DigField(path...) *Node` | Get field node (not value) at path | - insaneJSON.Release(root) // place roots back to the pool - insaneJSON.Release(emptyRoot) +### Read Values + +| Function | Description | +|---|---| +| `node.AsString() string` | Get string value | +| `node.AsInt() int` | Get integer value | +| `node.AsInt64() int64` | Get int64 value | +| `node.AsUint64() uint64` | Get uint64 value | +| `node.AsFloat() float64` | Get float64 value | +| `node.AsBool() bool` | Get bool value | +| `node.AsBytes() []byte` | Get value as byte slice | +| `node.AsEscapedString() string` | Get JSON-escaped string value | + +### Type Checks + +| Function | Description | +|---|---| +| `node.IsObject() bool` | Is value an object? | +| `node.IsArray() bool` | Is value an array? | +| `node.IsString() bool` | Is value a string? | +| `node.IsNumber() bool` | Is value a number? | +| `node.IsTrue() bool` | Is value true? | +| `node.IsFalse() bool` | Is value false? | +| `node.IsNull() bool` | Is value null? | +| `node.IsNil() bool` | Is node nil? | + +### Modify + +| Function | Description | +|---|---| +| `node.MutateToString(v)` | Set value to string | +| `node.MutateToInt(v)` | Set value to int | +| `node.MutateToFloat(v)` | Set value to float64 | +| `node.MutateToBool(v)` | Set value to bool | +| `node.MutateToNull()` | Set value to null | +| `node.MutateToObject()` | Set value to empty object | +| `node.MutateToArray()` | Set value to empty array | +| `node.MutateToJSON(root, json)` | Set value to parsed JSON | +| `node.MutateToField(name)` | Rename object field | +| `node.MutateToNode(other)` | Copy another node's value | +| `node.Suicide()` | Remove node from parent | +| `node.AddField(name) *Node` | Add field to object, returns value node | +| `node.AddElement() *Node` | Append element to array | +| `node.InsertElement(pos) *Node` | Insert element at position | +| `node.MergeWith(other)` | Merge other object's fields into this one | + +### Encode + +| Function | Description | +|---|---| +| `node.Encode(buf) []byte` | Encode to byte slice, reusing buf | +| `node.EncodeToByte() []byte` | Encode to new byte slice | +| `node.EncodeToString() string` | Encode to string | + +## Important Notes + +### Pool and Lifecycle + +Decoded nodes live inside a pool managed by the Root. After calling `Release(root)`, the Root and all its nodes are returned to the pool and **must not be used**. Accessing nodes after Release leads to undefined behavior. + +```go +root, _ := insaneJSON.DecodeString(`{"a":"b"}`) +node := root.Dig("a") + +insaneJSON.Release(root) + +// BUG: node belongs to the released root, this is undefined behavior +fmt.Println(node.AsString()) ``` -## Benchmarks -To be filled \ No newline at end of file +Always use `defer insaneJSON.Release(root)` right after decode. + +### Thread Safety + +The top-level functions `DecodeString`, `DecodeBytes`, and `Spawn` are safe to call from multiple goroutines — they use `sync.Pool` internally. + +However, a specific Root and its Nodes are **not thread-safe**. Do not share a Root between goroutines without synchronization. The typical pattern is one Root per goroutine: + +```go +// correct: each goroutine gets its own Root +for _, data := range items { + go func(d []byte) { + root, err := insaneJSON.DecodeBytes(d) + if err != nil { + return + } + defer insaneJSON.Release(root) + // work with root... + }(data) +} +``` + +### Nil-safe Navigation + +`Dig` on a nil node returns nil without panicking. This allows safe chaining: + +```go +// even if "a" doesn't exist, this won't panic — returns 0 +value := root.Dig("a", "b", "c").AsInt() +``` + +`As*` methods on nil nodes return zero values (`""`, `0`, `false`). + +Use `DigStrict` when you need to distinguish "field not found" from "field is zero value": + +```go +node, err := root.DigStrict("user", "email") +if err != nil { + // field doesn't exist +} +email, err := node.AsString() +if err != nil { + // field exists but is not a string +} +``` + +### Memory Management + +For best performance, reuse Root objects instead of decoding into new ones: + +```go +root := insaneJSON.Spawn() +defer insaneJSON.Release(root) + +for _, msg := range messages { + root.DecodeBytes(msg) // reuses internal buffers + process(root) +} +``` + +Use `root.ReleaseMem()` after processing an unusually large JSON to free internal buffers: + +```go +root.DecodeBytes(hugeJSON) +process(root) +root.ReleaseMem() // release internal buffers to GC +``` + +## Configuration + +| Variable | Default | Description | +|---|---|---| +| `insaneJSON.StartNodePoolSize` | 128 | Initial number of pre-allocated nodes per Root | +| `insaneJSON.MapUseThreshold` | 16 | Object field count above which Dig builds a hash map for O(1) lookup | +| `insaneJSON.DisableBeautifulErrors` | false | Set to true to skip formatting decode error messages for better performance | + +```go +func init() { + insaneJSON.StartNodePoolSize = 256 + insaneJSON.MapUseThreshold = 32 + insaneJSON.DisableBeautifulErrors = true +} +```