From 714fdde81feaea61ff6f48efa007424aca9d417e Mon Sep 17 00:00:00 2001 From: rjp Date: Fri, 14 Jan 2022 11:48:48 +0000 Subject: [PATCH 1/2] Alternate version of `statementsFromJSON` which returns the reader position when the JSON parse finished --- statements.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/statements.go b/statements.go index ed1b469..a525406 100644 --- a/statements.go +++ b/statements.go @@ -398,6 +398,21 @@ func statementsFromJSON(r io.Reader, prefix statement) (statements, error) { return ss, nil } +// statementsFromJSONOffset takes an io.Reader containing JSON +// and returns statements or an error on failure +func statementsFromJSONOffset(r io.Reader, prefix statement) (statements, int64, error) { + var top interface{} + d := json.NewDecoder(r) + d.UseNumber() + err := d.Decode(&top) + if err != nil { + return nil, 0, err + } + ss := make(statements, 0, 32) + ss.fill(prefix, top) + return ss, d.InputOffset(), nil +} + // fill takes a prefix statement and some value and recursively fills // the statement list using that value func (ss *statements) fill(prefix statement, v interface{}) { From c45575ee277a69b5843666a7ffda90dac4c94383 Mon Sep 17 00:00:00 2001 From: rjp Date: Mon, 17 Jan 2022 09:39:59 +0000 Subject: [PATCH 2/2] Adds "decode all" option Adds: `-a`, `-all` flag which means "decode all the objects, pretending it's a JSON stream even if it's not actually." Rationale: `gron` only decodes the first object, `gron -s` requires a "correctly" formatted JSON stream (one object per line), but it's not uncommon to get multiple objects per line with tools that don't support JSON stream formatting. This does require a positionable stream, however, since the JSON decoder can read past the end of an object to be sure its parsed correctly. `io.Seekable` doesn't work, unfortunately, because whilst we know where we want to be (`d.InputOffset()`), we don't actually know where we currently are which precludes the use of `io.SeekCurrent` and, bizarrely, it turns out that `io.SeekSet` gets progressively slower as you seek further and further into your (in this case) `bytes.Buffer`. Thus we keep track of where we want to be (`moved`) and create a `bytes.NewReader` for each attempted decode at the correct position. Crufty, definitely, and memory-allocation heavy, probably, but it works and is surprisingly not that bad even on large files. My test 85MB JSON single line input takes ~64s (x86_64), ~43s (arm64) and ~275M to parse into 1024 objects comprising 1GB of output text. Compare to `jq`: ~25s (x86_64), ~11s (arm64) using ~630M giving 350MB of output. --- main.go | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/main.go b/main.go index be6ad56..11f6220 100644 --- a/main.go +++ b/main.go @@ -33,6 +33,7 @@ const ( optMonochrome = 1 << iota optNoSort optJSON + optAllObjects ) // Output colors @@ -95,6 +96,7 @@ func main() { versionFlag bool insecureFlag bool jsonFlag bool + allObjectsFlag bool ) flag.BoolVar(&ungronFlag, "ungron", false, "") @@ -111,6 +113,8 @@ func main() { flag.BoolVar(&insecureFlag, "insecure", false, "") flag.BoolVar(&jsonFlag, "j", false, "") flag.BoolVar(&jsonFlag, "json", false, "") + flag.BoolVar(&allObjectsFlag, "a", false, "") + flag.BoolVar(&allObjectsFlag, "all", false, "") flag.Parse() @@ -165,9 +169,12 @@ func main() { var a actionFn = gron if ungronFlag { a = ungron + } else if allObjectsFlag { + a = gronStreamAll } else if streamFlag { a = gronStream } + exitCode, err := a(rawInput, colorable.NewColorableStdout(), opts) if exitCode != exitOK { @@ -312,6 +319,117 @@ out: } +// gronStreamAll is like the gron action, but it treats the input +// as multiple JSON objects. There's a bit of code duplication from the +// gron action, but it'd be fairly messy to combine the two actions +func gronStreamAll(r io.Reader, w io.Writer, opts int) (int, error) { + var err error + errstr := "failed to form statements" + var i int + var sc *bufio.Scanner + var buf []byte + var conv func(s statement) string + var top statement + var makePrefix func(index int) statement + var moved int64 + + // In order to read all the objects, we need a positionable stream + // because the JSON decoder reads past the end of a complete JSON + // item in order to complete a parse. Using `Seek()` doesn't work + // that well because whilst we have the position of the end of the + // JSON parse (via `d.InputOffset()`), we don't know the current + // position of the stream (and thus can't use `io.SeekCurrent`) + // meaning we have to `io.SeekStart` every time and that ends up + // getting progressively slower each time. Instead we use + // a `bytes.Buffer` as our `io.Reader`. + buf, err = io.ReadAll(r) + if err != nil { + goto out + } + + if opts&optMonochrome > 0 { + conv = statementToString + } else { + conv = statementToColorString + } + + // Helper function to make the prefix statements for each line + makePrefix = func(index int) statement { + return statement{ + {"json", typBare}, + {"[", typLBrace}, + {fmt.Sprintf("%d", index), typNumericKey}, + {"]", typRBrace}, + } + } + + // The first line of output needs to establish that the top-level + // thing is actually an array... + top = statement{ + {"json", typBare}, + {"=", typEquals}, + {"[]", typEmptyArray}, + {";", typSemi}, + } + + if opts&optJSON > 0 { + top, err = top.jsonify() + if err != nil { + goto out + } + } + + fmt.Fprintln(w, conv(top)) + + i = 0 + + for { + var offset int64 + var ss statements + + br := bytes.NewReader(buf[moved:]) + + ss, offset, err = statementsFromJSONOffset(br, makePrefix(i)) + i++ + if err != nil && err != io.EOF { + goto out + } + + // Go's maps do not have well-defined ordering, but we want a consistent + // output for a given input, so we must sort the statements + if opts&optNoSort == 0 { + sort.Sort(ss) + } + + for _, s := range ss { + if opts&optJSON > 0 { + s, err = s.jsonify() + if err != nil { + goto out + } + + } + fmt.Fprintln(w, conv(s)) + } + + if err == io.EOF { + return exitOK, nil + } + + moved = moved + offset + } + if err = sc.Err(); err != nil { + errstr = "error reading multiline input: %s" + } + +out: + if err != nil { + return exitFormStatements, fmt.Errorf(errstr+": %s", err) + } + return exitOK, nil + +} + // ungron is the reverse of gron. Given assignment statements as input, // it returns JSON. The only option is optMonochrome func ungron(r io.Reader, w io.Writer, opts int) (int, error) {