Skip to content

A different kind of json#198

Merged
katef merged 3 commits into
masterfrom
kate/different-json
Jan 19, 2020
Merged

A different kind of json#198
katef merged 3 commits into
masterfrom
kate/different-json

Conversation

@katef

@katef katef commented Jan 17, 2020

Copy link
Copy Markdown
Owner

Replacement of the libfsm json output with json that I hope is a little more convenient.

Typically it looks like this, with output for human-readable labels:

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -pl json '[abc][^x]'
{
  "statecount": 3,
  "start": 0,
  "end": [ 2 ],
  "edges": [ 
    { "src": 0, "dst": 0, "label": "[^abc]" },
    { "src": 0, "dst": 1, "label": "[abc]" },
    { "src": 1, "dst": 2, "label": "[^x]" },
    { "src": 1, "dst": 0, "label": "x" },
    { "src": 2, "dst": 2, "label": "/./" }
  ]
}

and similarly as an NFA:

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -npl json '[abc][^x]'
{
  "statecount": 9,
  "start": 0,
  "end": [ 1 ],
  "edges": [ 
    { "src": 0, "dst": 2, "label": "\u03B5" },
    { "src": 2, "dst": 2, "label": "/./" },
    { "src": 2, "dst": 4, "label": "[abc]" },
    { "src": 3, "dst": 3, "label": "/./" }, 
    { "src": 3, "dst": 1, "label": "\u03B5" },
    { "src": 4, "dst": 5, "label": "\u03B5" },
    { "src": 5, "dst": 6, "label": "\u03B5" },
    { "src": 6, "dst": 7, "label": "[^x]" },
    { "src": 6, "dst": 8, "label": "x" },
    { "src": 7, "dst": 3, "label": "\u03B5" }
  ]
}

With the .consolidate_edges option disabled, edges are given independently and this is visible with a symbol attribute (as opposed to label, which is intended for human consumption):

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -cbpl json 'a|b*c?e|d+'
{
  "statecount": 5,
  "start": 0, 
  "end": [ 1, 4 ],
  "edges": [ 
    { "src": 0, "dst": 1, "symbol": "a" },
    { "src": 0, "dst": 2, "symbol": "b" },
    { "src": 0, "dst": 3, "symbol": "c" },
    { "src": 0, "dst": 4, "symbol": "d" },
    { "src": 0, "dst": 1, "symbol": "e" },
    { "src": 2, "dst": 2, "symbol": "b" },
    { "src": 2, "dst": 3, "symbol": "c" },
    { "src": 2, "dst": 1, "symbol": "e" },
    { "src": 3, "dst": 1, "symbol": "e" },
    { "src": 4, "dst": 4, "symbol": "d" }
  ]
}

And with numeric output for byte values (intended for machine consumption):

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -Xcbpl json 'abc'
{
  "statecount": 4,
  "start": 0,
  "end": [ 3 ],
  "edges": [
    { "src": 0, "dst": 1, "symbol": 97 },
    { "src": 1, "dst": 2, "symbol": 98 },
    { "src": 2, "dst": 3, "symbol": 99 }
  ]
}

I didn't write a json schema for this, sorry. I should provide one, I know.

I did experiment with using dagre for node placement.
Here's Graphviz's rendering for the above anchored /a|b*c?e|d+/ regexp:

image

And the same FSM with a quick mock-up page rendering that using dagre-D3:

image

I had wanted to use this json to model using dagre's algorithm for node placement,
in combination with dot(1)'s algorithm for edges. Unfortunately Graphviz will only
heed pre-defined coordinates when rendering using neato, which produces straight
lines for edges, which is of course not what I had hoped for:

image

Regardless, I wrote a nodejs script to read in this new json, and to format it out
to .dot format with the coordinates populated:

#!/usr/bin/env nodejs

var dagre = require("dagre");

const fs = require("fs");
const data = JSON.parse(fs.readFileSync(0, "utf-8"));

function json_read(graphlib, json) {
    var g;

    g = new graphlib.Graph({
            directed: true,
            multigraph: true,
            compound: false,
        });

    g.setGraph({ rankdir: "lr", nodesep: 30, edgesep: 30 });

    g.setNode("start", { label: "" });

    for (var i = 0; i < json.statecount; i++) {
        g.setNode(i, { label: "" });
    }

    var i = 0;
    json.edges.forEach(function (entry) {
        g.setEdge({ v: entry.src, w: entry.dst, name: i },
            { label: entry.label });
        i++;
    });

    g.setEdge({ v: "start", w: data.start }, { label: "" });

    return g;
}

var g = json_read(dagre.graphlib, data);

dagre.layout(g);

console.log("digraph G {");

console.log("\trankdir = LR;");
console.log("\tnode [ shape = circle ];");
console.log("\tedge [ weight = 2 ];");
console.log("\tnode [ label = \"\", width = 0.3 ];");
console.log("\tlayout = neato;"); // needed for pos="" XXX: but i want to use dot's edges!
console.log("\troot = start;");
console.log("");

console.log("\tSstart [ shape = none, label = \"\" ];");
console.log("");

// mapping from dagre's coordinates to graphviz's
var xscale = 0.03;
var yscale = 0.02;

g.nodes().forEach(function(v) {
    var n = g.node(v);
    if (data.end.indexOf(parseInt(v)) > -1) {
        console.log(`\tS${v} [ shape = \"doublecircle\" ];`);
    }
    console.log(`\tS${v} [ pos = "${n.x * xscale},${n.y * yscale}!" ];`);
});
console.log("");

g.edges().forEach(function(e) {
    var q = g.edge(e);
    console.log(`\tS${e.v} -> S${e.w} [ label = <${q.label}> ];`);
});

console.log("}");
console.log("");

which produces output like:

; ~/gh/libfsm-pristine/build/bin/re -pl json 'ab?c|de+x?' | ~/gh/libfsm-pristine/dagre/w2.js                    digraph G {
        rankdir = LR;
        node [ shape = circle ];
        edge [ weight = 2 ];
        node [ label = "", width = 0.3 ];
        layout = neato;
        root = start;

        Sstart [ shape = none, label = "" ];

        S0 [ pos = "1.5,3!" ];
        S1 [ pos = "3,0!" ];
        S2 [ pos = "6,1.85!" ];
        S3 [ shape = "doublecircle" ];
        S3 [ pos = "7.5,0.35000000000000003!" ];
        S4 [ pos = "4.5,0.9!" ];
        Sstart [ pos = "0,3!" ];

        S0 -> S0 [ label = <[^ad]> ];
        S0 -> S1 [ label = <a> ];
        S0 -> S2 [ label = <d> ];
        S1 -> S0 [ label = <[^a-d]> ];
        S1 -> S1 [ label = <a> ];
        S1 -> S4 [ label = <b> ];
        S1 -> S3 [ label = <c> ];
        S1 -> S2 [ label = <d> ];
        S2 -> S0 [ label = <[^ade]> ];
        S2 -> S1 [ label = <a> ];
        S2 -> S2 [ label = <d> ];
        S2 -> S3 [ label = <e> ];
        S3 -> S3 [ label = </./> ];
        S4 -> S0 [ label = <[^acd]> ];
        S4 -> S1 [ label = <a> ];
        S4 -> S3 [ label = <c> ];
        S4 -> S2 [ label = <d> ];
        Sstart -> S0 [ label = <> ];
}

katef added 3 commits January 16, 2020 16:53
This aims to please both human and machine consumption; when `.consolidate_edges` is set, human-readable labels are produced. Otherwise, edges have verbatim symbool attributes.
@katef katef merged commit e6bef24 into master Jan 19, 2020
@katef katef deleted the kate/different-json branch January 19, 2020 00:56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant