Skip to content

Commit

Permalink
feat: JSON parser escape support (#88)
Browse files Browse the repository at this point in the history
* feat: basic parser with escape

* fix: tests missing input

* feat: working escape

* feat: rust witness generator

* fix tests

---------

Co-authored-by: lonerapier <lonerapier@proton.me>
  • Loading branch information
Autoparallel and lonerapier authored Feb 12, 2025
1 parent a62ebfa commit 244b30c
Show file tree
Hide file tree
Showing 12 changed files with 198 additions and 66 deletions.
7 changes: 5 additions & 2 deletions circuits/json/extraction.circom
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) {
signal input ciphertext_digest;
signal input sequence_digest; // todo(sambhav): should sequence digest be 0 for first json circuit?
signal input value_digest;
signal input state[MAX_STACK_HEIGHT * 4 + 3];
signal input state[MAX_STACK_HEIGHT * 4 + 4];

signal input step_in[PUBLIC_IO_LENGTH];
signal output step_out[PUBLIC_IO_LENGTH];
Expand All @@ -17,7 +17,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) {

// assertions:
// step_in[5] === 0; // HTTP statements matched // TODO: either remove this or send a public io var
signal input_state_digest <== PolynomialDigest(MAX_STACK_HEIGHT * 4 + 3)(state, ciphertext_digest);
signal input_state_digest <== PolynomialDigest(MAX_STACK_HEIGHT * 4 + 4)(state, ciphertext_digest);
step_in[8] === input_state_digest;
signal sequence_digest_hashed <== Poseidon(1)([sequence_digest]);
step_in[9] === sequence_digest_hashed;
Expand Down Expand Up @@ -62,6 +62,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) {
State[0].monomial <== state[MAX_STACK_HEIGHT*4];
State[0].parsing_string <== state[MAX_STACK_HEIGHT*4 + 1];
State[0].parsing_number <== state[MAX_STACK_HEIGHT*4 + 2];
State[0].escaped <== state[MAX_STACK_HEIGHT*4 + 3];
} else {
State[data_idx] = StateUpdateHasher(MAX_STACK_HEIGHT);
State[data_idx].byte <== data[data_idx];
Expand All @@ -71,6 +72,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) {
State[data_idx].monomial <== State[data_idx - 1].next_monomial;
State[data_idx].parsing_string <== State[data_idx - 1].next_parsing_string;
State[data_idx].parsing_number <== State[data_idx - 1].next_parsing_number;
State[data_idx].escaped <== State[data_idx - 1].next_escaped;
}

// Digest the whole stack and key tree hash
Expand Down Expand Up @@ -105,6 +107,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) {
// log("State[", data_idx, "].next_monomial =", State[data_idx].next_monomial);
// log("State[", data_idx, "].next_parsing_string =", State[data_idx].next_parsing_string);
// log("State[", data_idx, "].next_parsing_number =", State[data_idx].next_parsing_number);
// log("State[", data_idx, "].next_escaped =", State[data_idx].next_escaped);
// log("++++++++++++++++++++++++++++++++++++++++++++++++");
// log("state_digest[", data_idx,"] = ", state_digest[data_idx]);
// log("total_matches = ", total_matches);
Expand Down
46 changes: 31 additions & 15 deletions circuits/json/hash_machine.circom
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,17 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) {
signal input polynomial_input;
signal input monomial;
signal input tree_hash[MAX_STACK_HEIGHT][2];
signal input escaped;

signal output next_stack[MAX_STACK_HEIGHT][2];
signal output next_parsing_string;
signal output next_parsing_number;
signal output next_monomial;
signal output next_tree_hash[MAX_STACK_HEIGHT][2];
signal output next_escaped;

component Command = Command();
component Syntax = Syntax();

// log("--------------------------------");
// log("byte: ", byte);
Expand All @@ -72,36 +75,40 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) {
// Break down what was read
// * read in a start brace `{` *
component readStartBrace = IsEqual();
readStartBrace.in <== [byte, 123];
readStartBrace.in <== [byte, Syntax.START_BRACE];
// * read in an end brace `}` *
component readEndBrace = IsEqual();
readEndBrace.in <== [byte, 125];
readEndBrace.in <== [byte, Syntax.END_BRACE];
// * read in a start bracket `[` *
component readStartBracket = IsEqual();
readStartBracket.in <== [byte, 91];
readStartBracket.in <== [byte, Syntax.START_BRACKET];
// * read in an end bracket `]` *
component readEndBracket = IsEqual();
readEndBracket.in <== [byte, 93];
readEndBracket.in <== [byte, Syntax.END_BRACKET];
// * read in a colon `:` *
component readColon = IsEqual();
readColon.in <== [byte, 58];
readColon.in <== [byte, Syntax.COLON];
// * read in a comma `,` *
component readComma = IsEqual();
readComma.in <== [byte, 44];
readComma.in <== [byte, Syntax.COMMA];

component readDot = IsEqual();
readDot.in <== [byte, 46];
readDot.in <== [byte, Syntax.DOT];

// * read in some delimeter *
signal readDelimeter <== readStartBrace.out + readEndBrace.out + readStartBracket.out + readEndBracket.out
+ readColon.out + readComma.out;
// * read in some number *
component readNumber = InRange(8);
readNumber.in <== byte;
readNumber.range <== [48, 57]; // This is the range where ASCII digits are
readNumber.range <== [Syntax.NUMBER_START, Syntax.NUMBER_END]; // This is the range where ASCII digits are
// * read in a quote `"` *
component readQuote = IsEqual();
readQuote.in <== [byte, 34];
readQuote.in <== [byte, Syntax.QUOTE];
// * read in a escape `\` *
component readEscape = IsEqual();
readEscape.in <== [byte, Syntax.ESCAPE];

component readOther = IsZero();
readOther.in <== readDelimeter + readNumber.out + readQuote.out + readDot.out;
//--------------------------------------------------------------------------------------------//
Expand Down Expand Up @@ -149,7 +156,7 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) {
mulMaskAndOut.lhs <== mask.out;
mulMaskAndOut.rhs <== [Instruction.out[0], Instruction.out[1], Instruction.out[2] - readOther.out];

next_parsing_string <== parsing_string + mulMaskAndOut.out[1];
next_parsing_string <== escaped * (parsing_string - (parsing_string + mulMaskAndOut.out[1])) + (parsing_string + mulMaskAndOut.out[1]);
next_parsing_number <== parsing_number + mulMaskAndOut.out[2];

component newStack = RewriteStack(MAX_STACK_HEIGHT);
Expand All @@ -170,10 +177,17 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) {
newStack.next_parsing_number <== next_parsing_number;
newStack.byte <== byte;
newStack.polynomial_input <== polynomial_input;
// * set all the next state of the parser *
next_stack <== newStack.next_stack;
next_tree_hash <== newStack.next_tree_hash;
next_monomial <== newStack.next_monomial;
newStack.escaped <== escaped;
// * set all the next state of the parser using the escaped toggle *
// Toggle escaped if read
next_escaped <== readEscape.out * (1 - escaped);
for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) {
next_stack[i][0] <== next_escaped * (stack[i][0] - newStack.next_stack[i][0]) + newStack.next_stack[i][0];
next_stack[i][1] <== next_escaped * (stack[i][1] - newStack.next_stack[i][1]) + newStack.next_stack[i][1];
next_tree_hash[i][0] <== next_escaped * (tree_hash[i][0] - newStack.next_tree_hash[i][0]) + newStack.next_tree_hash[i][0];
next_tree_hash[i][1] <== next_escaped * (tree_hash[i][1] - newStack.next_tree_hash[i][1]) + newStack.next_tree_hash[i][1];
}
next_monomial <== next_escaped * (monomial - newStack.next_monomial) + newStack.next_monomial;
}

/*
Expand Down Expand Up @@ -300,6 +314,7 @@ template RewriteStack(n) {
signal input readColon;
signal input readComma;
signal input readQuote;
signal input escaped;

signal input parsing_number;
signal input parsing_string;
Expand Down Expand Up @@ -399,7 +414,8 @@ template RewriteStack(n) {

signal to_clear_zeroth <== end_kv;
signal stopped_parsing_number <== IsEqual()([(parsing_number - next_parsing_number), 1]);
signal not_to_clear_first <== IsZero()(end_kv + readQuote * parsing_string + stopped_parsing_number);
signal read_quote_not_escaped <== readQuote * (1 - escaped);
signal not_to_clear_first <== IsZero()(end_kv + read_quote_not_escaped * parsing_string + stopped_parsing_number);
signal to_clear_first <== (1 - not_to_clear_first);
signal tree_hash_change_value[2] <== [(1 - to_clear_zeroth) * next_state_hash[0], (1 - to_clear_first) * next_state_hash[1]];

Expand Down
5 changes: 3 additions & 2 deletions circuits/json/language.circom
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ template Syntax() {
// signal output SPACE <== 32;
//-Escape-------------------------------------------------------------------------------------//
// - ASCII char: `\`
// signal output ESCAPE <== 92;
signal output ESCAPE <== 92;
//-Number_Remapping---------------------------------------------------------------------------//
signal output NUMBER_START <== 48;
signal output NUMBER_END <== 57;

signal output DOT <== 46;
}

template Command() {
// STATE = [read_write_value, parsing_string, parsing_number]
// STATE = [read_write_value, parsing_string, parsing_number, escape]
signal output START_BRACE[3] <== [1, 0, 0 ]; // Command returned by switch if we hit a start brace `{`
signal output END_BRACE[3] <== [-1, 0, -1 ]; // Command returned by switch if we hit a end brace `}`
signal output START_BRACKET[3] <== [2, 0, 0 ]; // Command returned by switch if we hit a start bracket `[`
Expand Down
18 changes: 15 additions & 3 deletions circuits/json/machine.circom
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ template StateUpdate(MAX_STACK_HEIGHT) {
signal input stack[MAX_STACK_HEIGHT][2];
signal input parsing_string;
signal input parsing_number;
signal input escaped;

signal output next_stack[MAX_STACK_HEIGHT][2];
signal output next_parsing_string;
signal output next_parsing_number;
signal output next_escaped;

component Command = Command();
component Syntax = Syntax();
Expand Down Expand Up @@ -88,6 +90,10 @@ template StateUpdate(MAX_STACK_HEIGHT) {
// * read in a quote `"` *
component readQuote = IsEqual();
readQuote.in <== [byte, Syntax.QUOTE];
// * read in a escape `\` *
component readEscape = IsEqual();
readEscape.in <== [byte, Syntax.ESCAPE];

component readOther = IsZero();
readOther.in <== readDelimeter + readNumber.out + readQuote.out;
//--------------------------------------------------------------------------------------------//
Expand Down Expand Up @@ -145,9 +151,15 @@ template StateUpdate(MAX_STACK_HEIGHT) {
newStack.readColon <== readColon.out;
newStack.readComma <== readComma.out;
// * set all the next state of the parser *
next_stack <== newStack.next_stack;
next_parsing_string <== parsing_string + mulMaskAndOut.out[1];
next_parsing_number <== parsing_number + mulMaskAndOut.out[2];
// b * (y - x) + x --> Simple way of doing a switch with boolean b
for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) {
next_stack[i][0] <== readEscape.out * (stack[i][0] - newStack.next_stack[i][0]) + newStack.next_stack[i][0];
next_stack[i][1] <== readEscape.out * (stack[i][1] - newStack.next_stack[i][1]) + newStack.next_stack[i][1];
}
next_parsing_string <== readEscape.out * (parsing_string - (parsing_string + mulMaskAndOut.out[1])) + (parsing_string + mulMaskAndOut.out[1]);
next_parsing_number <== readEscape.out * (parsing_number - (parsing_number + mulMaskAndOut.out[2])) + (parsing_number + mulMaskAndOut.out[2]);
// Toggle escaped if read
next_escaped <== readEscape.out * (1 - escaped);
//--------------------------------------------------------------------------------------------//
}

Expand Down
22 changes: 22 additions & 0 deletions circuits/json/parser.circom
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,41 @@ template Parser(DATA_BYTES, MAX_STACK_HEIGHT) {
}
State[0].parsing_string <== 0;
State[0].parsing_number <== 0;
State[0].escaped <== 0;

// Debugging
for(var i = 0; i<MAX_STACK_HEIGHT; i++) {
log("State[", 0, "].next_stack[", i,"] = [",State[0].next_stack[i][0], "][", State[0].next_stack[i][1],"]" );
}
log("State[", 0, "].next_parsing_string =", State[0].next_parsing_string);
log("State[", 0, "].next_parsing_number =", State[0].next_parsing_number);
log("State[", 0, "].next_escaped =", State[0].next_escaped);
log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");

for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) {
State[data_idx] = StateUpdate(MAX_STACK_HEIGHT);
State[data_idx].byte <== data[data_idx];
State[data_idx].stack <== State[data_idx - 1].next_stack;
State[data_idx].parsing_string <== State[data_idx - 1].next_parsing_string;
State[data_idx].parsing_number <== State[data_idx - 1].next_parsing_number;
State[data_idx].escaped <== State[data_idx - 1].next_escaped;

// Debugging
for(var i = 0; i<MAX_STACK_HEIGHT; i++) {
log("State[", data_idx, "].next_stack[", i,"] = [",State[data_idx].next_stack[i][0], "][", State[data_idx].next_stack[i][1],"]" );
}
log("State[", data_idx, "].next_parsing_string =", State[data_idx].next_parsing_string);
log("State[", data_idx, "].next_parsing_number =", State[data_idx].next_parsing_number);
log("State[", data_idx, "].next_escaped =", State[data_idx].next_escaped);
log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
}

// Constrain to have valid JSON
State[DATA_BYTES - 1].next_parsing_string === 0;
State[DATA_BYTES - 1].next_parsing_number === 0;
State[DATA_BYTES - 1].next_escaped === 0;
for(var i = 0; i < MAX_STACK_HEIGHT; i++) {
State[DATA_BYTES - 1].next_stack[i] === [0,0];
}

}
28 changes: 14 additions & 14 deletions circuits/test/full/full.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ describe("Example NIVC Proof", async () => {
{ type: "Object", value: KEY3 },
];

let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);

const padded_http_body = http_body.concat(Array(DATA_BYTES - http_body.length).fill(-1));
const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT);
Expand Down Expand Up @@ -256,7 +256,7 @@ describe("Example NIVC Proof", async () => {
];

const padded_http_body = http_body.concat(Array(DATA_BYTES - http_body.length).fill(-1));
let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);
const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT);
const sequence_digest = compressTreeHash(ciphertext_digest, [stack, treeHashes]);
const value_digest = PolynomialDigest(targetValue, ciphertext_digest, BigInt(0));
Expand Down Expand Up @@ -372,7 +372,7 @@ describe("Example NIVC Proof", async () => {
{ type: "Object", value: KEY0 },
];

let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);

const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT);
const sequence_digest = compressTreeHash(ciphertext_digest, [stack, treeHashes]);
Expand Down Expand Up @@ -520,7 +520,7 @@ describe("Example NIVC Proof", async () => {

// const requestTargetValue = strToBytes("0");

let requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);
const requestJsonState = [requestJsonInitialState, requestJsonInitialState];

// TODO: request sequence digest is same as response sequence digest
Expand Down Expand Up @@ -678,7 +678,7 @@ describe("Example NIVC Proof", async () => {

const targetValue = strToBytes("ord_67890");

let initialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let initialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);
let jsonState1: (bigint | number)[] = [
1, 1,
1, 1,
Expand All @@ -700,9 +700,9 @@ describe("Example NIVC Proof", async () => {
0, 0,
0, 0,
0, 0,
1, 1, 0
1, 1, 0, 0
];
assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 3);
assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 4);
let state = [initialState, jsonState1];

const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, manifest.response.body.json, MAX_STACK_HEIGHT);
Expand Down Expand Up @@ -997,7 +997,7 @@ describe("512B circuit", function () {

// request JSON

const requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
const requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);
const requestJsonState1 = [
1, 1,
1, 1,
Expand All @@ -1019,7 +1019,7 @@ describe("512B circuit", function () {
0, 0,
0, 0,
0, 0,
0, 0, 0
0, 0, 0, 0
]
const requestJsonState = [requestJsonInitialState, requestJsonState1];

Expand Down Expand Up @@ -1061,7 +1061,7 @@ describe("512B circuit", function () {
let responseJsonCircuitCount = Math.ceil(responseBody.length / DATA_BYTES);


let initialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0);
let initialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0);
let jsonState1: (bigint | number)[] = [
1, 1,
1, 1,
Expand All @@ -1083,7 +1083,7 @@ describe("512B circuit", function () {
0, 0,
0, 0,
0, 0,
0, 0, 0
0, 0, 0, 0
];
let jsonState2: (bigint | number)[] = [
1, 1,
Expand All @@ -1106,7 +1106,7 @@ describe("512B circuit", function () {
0, 0,
0, 0,
0, 0,
1, 1, 0
1, 1, 0, 0
];
let jsonState3: (bigint | number)[] = [
1, 1,
Expand All @@ -1129,9 +1129,9 @@ describe("512B circuit", function () {
0, 0,
0, 0,
0, 0,
0, 1, 0
0, 1, 0, 0
];
assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 3);
assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 4);
let jsonStates = [initialState, jsonState1, jsonState2, jsonState3];

const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, manifest.response.body.json, MAX_STACK_HEIGHT);
Expand Down
Loading

0 comments on commit 244b30c

Please sign in to comment.