From 244b30cee961ba127532021cc517f0fd95beda09 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Wed, 12 Feb 2025 04:46:45 -0700 Subject: [PATCH] feat: JSON parser escape support (#88) * feat: basic parser with escape * fix: tests missing input * feat: working escape * feat: rust witness generator * fix tests --------- Co-authored-by: lonerapier --- circuits/json/extraction.circom | 7 +++- circuits/json/hash_machine.circom | 46 +++++++++++++++-------- circuits/json/language.circom | 5 ++- circuits/json/machine.circom | 18 +++++++-- circuits/json/parser.circom | 22 +++++++++++ circuits/test/full/full.test.ts | 28 +++++++------- circuits/test/json/extraction.test.ts | 54 ++++++++++++++++++++++----- circuits/test/json/index.ts | 2 + circuits/test/json/parser.test.ts | 19 +++++++++- circuits/test/json/values.test.ts | 4 +- witness-generator/src/json/mod.rs | 10 +++-- witness-generator/src/json/parser.rs | 49 +++++++++++++++++------- 12 files changed, 198 insertions(+), 66 deletions(-) diff --git a/circuits/json/extraction.circom b/circuits/json/extraction.circom index 2e58890..cdcec2d 100644 --- a/circuits/json/extraction.circom +++ b/circuits/json/extraction.circom @@ -8,7 +8,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) { signal input ciphertext_digest; signal input sequence_digest; // todo(sambhav): should sequence digest be 0 for first json circuit? signal input value_digest; - signal input state[MAX_STACK_HEIGHT * 4 + 3]; + signal input state[MAX_STACK_HEIGHT * 4 + 4]; signal input step_in[PUBLIC_IO_LENGTH]; signal output step_out[PUBLIC_IO_LENGTH]; @@ -17,7 +17,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) { // assertions: // step_in[5] === 0; // HTTP statements matched // TODO: either remove this or send a public io var - signal input_state_digest <== PolynomialDigest(MAX_STACK_HEIGHT * 4 + 3)(state, ciphertext_digest); + signal input_state_digest <== PolynomialDigest(MAX_STACK_HEIGHT * 4 + 4)(state, ciphertext_digest); step_in[8] === input_state_digest; signal sequence_digest_hashed <== Poseidon(1)([sequence_digest]); step_in[9] === sequence_digest_hashed; @@ -62,6 +62,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) { State[0].monomial <== state[MAX_STACK_HEIGHT*4]; State[0].parsing_string <== state[MAX_STACK_HEIGHT*4 + 1]; State[0].parsing_number <== state[MAX_STACK_HEIGHT*4 + 2]; + State[0].escaped <== state[MAX_STACK_HEIGHT*4 + 3]; } else { State[data_idx] = StateUpdateHasher(MAX_STACK_HEIGHT); State[data_idx].byte <== data[data_idx]; @@ -71,6 +72,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) { State[data_idx].monomial <== State[data_idx - 1].next_monomial; State[data_idx].parsing_string <== State[data_idx - 1].next_parsing_string; State[data_idx].parsing_number <== State[data_idx - 1].next_parsing_number; + State[data_idx].escaped <== State[data_idx - 1].next_escaped; } // Digest the whole stack and key tree hash @@ -105,6 +107,7 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT, PUBLIC_IO_LENGTH) { // log("State[", data_idx, "].next_monomial =", State[data_idx].next_monomial); // log("State[", data_idx, "].next_parsing_string =", State[data_idx].next_parsing_string); // log("State[", data_idx, "].next_parsing_number =", State[data_idx].next_parsing_number); + // log("State[", data_idx, "].next_escaped =", State[data_idx].next_escaped); // log("++++++++++++++++++++++++++++++++++++++++++++++++"); // log("state_digest[", data_idx,"] = ", state_digest[data_idx]); // log("total_matches = ", total_matches); diff --git a/circuits/json/hash_machine.circom b/circuits/json/hash_machine.circom index 955b58c..d523cb9 100644 --- a/circuits/json/hash_machine.circom +++ b/circuits/json/hash_machine.circom @@ -55,14 +55,17 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal input polynomial_input; signal input monomial; signal input tree_hash[MAX_STACK_HEIGHT][2]; + signal input escaped; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; signal output next_monomial; signal output next_tree_hash[MAX_STACK_HEIGHT][2]; + signal output next_escaped; component Command = Command(); + component Syntax = Syntax(); // log("--------------------------------"); // log("byte: ", byte); @@ -72,25 +75,25 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { // Break down what was read // * read in a start brace `{` * component readStartBrace = IsEqual(); - readStartBrace.in <== [byte, 123]; + readStartBrace.in <== [byte, Syntax.START_BRACE]; // * read in an end brace `}` * component readEndBrace = IsEqual(); - readEndBrace.in <== [byte, 125]; + readEndBrace.in <== [byte, Syntax.END_BRACE]; // * read in a start bracket `[` * component readStartBracket = IsEqual(); - readStartBracket.in <== [byte, 91]; + readStartBracket.in <== [byte, Syntax.START_BRACKET]; // * read in an end bracket `]` * component readEndBracket = IsEqual(); - readEndBracket.in <== [byte, 93]; + readEndBracket.in <== [byte, Syntax.END_BRACKET]; // * read in a colon `:` * component readColon = IsEqual(); - readColon.in <== [byte, 58]; + readColon.in <== [byte, Syntax.COLON]; // * read in a comma `,` * component readComma = IsEqual(); - readComma.in <== [byte, 44]; + readComma.in <== [byte, Syntax.COMMA]; component readDot = IsEqual(); - readDot.in <== [byte, 46]; + readDot.in <== [byte, Syntax.DOT]; // * read in some delimeter * signal readDelimeter <== readStartBrace.out + readEndBrace.out + readStartBracket.out + readEndBracket.out @@ -98,10 +101,14 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { // * read in some number * component readNumber = InRange(8); readNumber.in <== byte; - readNumber.range <== [48, 57]; // This is the range where ASCII digits are + readNumber.range <== [Syntax.NUMBER_START, Syntax.NUMBER_END]; // This is the range where ASCII digits are // * read in a quote `"` * component readQuote = IsEqual(); - readQuote.in <== [byte, 34]; + readQuote.in <== [byte, Syntax.QUOTE]; + // * read in a escape `\` * + component readEscape = IsEqual(); + readEscape.in <== [byte, Syntax.ESCAPE]; + component readOther = IsZero(); readOther.in <== readDelimeter + readNumber.out + readQuote.out + readDot.out; //--------------------------------------------------------------------------------------------// @@ -149,7 +156,7 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { mulMaskAndOut.lhs <== mask.out; mulMaskAndOut.rhs <== [Instruction.out[0], Instruction.out[1], Instruction.out[2] - readOther.out]; - next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; + next_parsing_string <== escaped * (parsing_string - (parsing_string + mulMaskAndOut.out[1])) + (parsing_string + mulMaskAndOut.out[1]); next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; component newStack = RewriteStack(MAX_STACK_HEIGHT); @@ -170,10 +177,17 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { newStack.next_parsing_number <== next_parsing_number; newStack.byte <== byte; newStack.polynomial_input <== polynomial_input; - // * set all the next state of the parser * - next_stack <== newStack.next_stack; - next_tree_hash <== newStack.next_tree_hash; - next_monomial <== newStack.next_monomial; + newStack.escaped <== escaped; + // * set all the next state of the parser using the escaped toggle * + // Toggle escaped if read + next_escaped <== readEscape.out * (1 - escaped); + for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + next_stack[i][0] <== next_escaped * (stack[i][0] - newStack.next_stack[i][0]) + newStack.next_stack[i][0]; + next_stack[i][1] <== next_escaped * (stack[i][1] - newStack.next_stack[i][1]) + newStack.next_stack[i][1]; + next_tree_hash[i][0] <== next_escaped * (tree_hash[i][0] - newStack.next_tree_hash[i][0]) + newStack.next_tree_hash[i][0]; + next_tree_hash[i][1] <== next_escaped * (tree_hash[i][1] - newStack.next_tree_hash[i][1]) + newStack.next_tree_hash[i][1]; + } + next_monomial <== next_escaped * (monomial - newStack.next_monomial) + newStack.next_monomial; } /* @@ -300,6 +314,7 @@ template RewriteStack(n) { signal input readColon; signal input readComma; signal input readQuote; + signal input escaped; signal input parsing_number; signal input parsing_string; @@ -399,7 +414,8 @@ template RewriteStack(n) { signal to_clear_zeroth <== end_kv; signal stopped_parsing_number <== IsEqual()([(parsing_number - next_parsing_number), 1]); - signal not_to_clear_first <== IsZero()(end_kv + readQuote * parsing_string + stopped_parsing_number); + signal read_quote_not_escaped <== readQuote * (1 - escaped); + signal not_to_clear_first <== IsZero()(end_kv + read_quote_not_escaped * parsing_string + stopped_parsing_number); signal to_clear_first <== (1 - not_to_clear_first); signal tree_hash_change_value[2] <== [(1 - to_clear_zeroth) * next_state_hash[0], (1 - to_clear_first) * next_state_hash[1]]; diff --git a/circuits/json/language.circom b/circuits/json/language.circom index 0566d74..93d6b94 100644 --- a/circuits/json/language.circom +++ b/circuits/json/language.circom @@ -23,15 +23,16 @@ template Syntax() { // signal output SPACE <== 32; //-Escape-------------------------------------------------------------------------------------// // - ASCII char: `\` - // signal output ESCAPE <== 92; + signal output ESCAPE <== 92; //-Number_Remapping---------------------------------------------------------------------------// signal output NUMBER_START <== 48; signal output NUMBER_END <== 57; + signal output DOT <== 46; } template Command() { - // STATE = [read_write_value, parsing_string, parsing_number] + // STATE = [read_write_value, parsing_string, parsing_number, escape] signal output START_BRACE[3] <== [1, 0, 0 ]; // Command returned by switch if we hit a start brace `{` signal output END_BRACE[3] <== [-1, 0, -1 ]; // Command returned by switch if we hit a end brace `}` signal output START_BRACKET[3] <== [2, 0, 0 ]; // Command returned by switch if we hit a start bracket `[` diff --git a/circuits/json/machine.circom b/circuits/json/machine.circom index 5fa147a..b2ab589 100644 --- a/circuits/json/machine.circom +++ b/circuits/json/machine.circom @@ -50,10 +50,12 @@ template StateUpdate(MAX_STACK_HEIGHT) { signal input stack[MAX_STACK_HEIGHT][2]; signal input parsing_string; signal input parsing_number; + signal input escaped; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; + signal output next_escaped; component Command = Command(); component Syntax = Syntax(); @@ -88,6 +90,10 @@ template StateUpdate(MAX_STACK_HEIGHT) { // * read in a quote `"` * component readQuote = IsEqual(); readQuote.in <== [byte, Syntax.QUOTE]; + // * read in a escape `\` * + component readEscape = IsEqual(); + readEscape.in <== [byte, Syntax.ESCAPE]; + component readOther = IsZero(); readOther.in <== readDelimeter + readNumber.out + readQuote.out; //--------------------------------------------------------------------------------------------// @@ -145,9 +151,15 @@ template StateUpdate(MAX_STACK_HEIGHT) { newStack.readColon <== readColon.out; newStack.readComma <== readComma.out; // * set all the next state of the parser * - next_stack <== newStack.next_stack; - next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; - next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; + // b * (y - x) + x --> Simple way of doing a switch with boolean b + for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + next_stack[i][0] <== readEscape.out * (stack[i][0] - newStack.next_stack[i][0]) + newStack.next_stack[i][0]; + next_stack[i][1] <== readEscape.out * (stack[i][1] - newStack.next_stack[i][1]) + newStack.next_stack[i][1]; + } + next_parsing_string <== readEscape.out * (parsing_string - (parsing_string + mulMaskAndOut.out[1])) + (parsing_string + mulMaskAndOut.out[1]); + next_parsing_number <== readEscape.out * (parsing_number - (parsing_number + mulMaskAndOut.out[2])) + (parsing_number + mulMaskAndOut.out[2]); + // Toggle escaped if read + next_escaped <== readEscape.out * (1 - escaped); //--------------------------------------------------------------------------------------------// } diff --git a/circuits/json/parser.circom b/circuits/json/parser.circom index f8f153a..d3d51df 100644 --- a/circuits/json/parser.circom +++ b/circuits/json/parser.circom @@ -15,6 +15,16 @@ template Parser(DATA_BYTES, MAX_STACK_HEIGHT) { } State[0].parsing_string <== 0; State[0].parsing_number <== 0; + State[0].escaped <== 0; + + // Debugging + for(var i = 0; i { { type: "Object", value: KEY3 }, ]; - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const padded_http_body = http_body.concat(Array(DATA_BYTES - http_body.length).fill(-1)); const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT); @@ -256,7 +256,7 @@ describe("Example NIVC Proof", async () => { ]; const padded_http_body = http_body.concat(Array(DATA_BYTES - http_body.length).fill(-1)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT); const sequence_digest = compressTreeHash(ciphertext_digest, [stack, treeHashes]); const value_digest = PolynomialDigest(targetValue, ciphertext_digest, BigInt(0)); @@ -372,7 +372,7 @@ describe("Example NIVC Proof", async () => { { type: "Object", value: KEY0 }, ]; - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, keySequence, MAX_STACK_HEIGHT); const sequence_digest = compressTreeHash(ciphertext_digest, [stack, treeHashes]); @@ -520,7 +520,7 @@ describe("Example NIVC Proof", async () => { // const requestTargetValue = strToBytes("0"); - let requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const requestJsonState = [requestJsonInitialState, requestJsonInitialState]; // TODO: request sequence digest is same as response sequence digest @@ -678,7 +678,7 @@ describe("Example NIVC Proof", async () => { const targetValue = strToBytes("ord_67890"); - let initialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let initialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let jsonState1: (bigint | number)[] = [ 1, 1, 1, 1, @@ -700,9 +700,9 @@ describe("Example NIVC Proof", async () => { 0, 0, 0, 0, 0, 0, - 1, 1, 0 + 1, 1, 0, 0 ]; - assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 3); + assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 4); let state = [initialState, jsonState1]; const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, manifest.response.body.json, MAX_STACK_HEIGHT); @@ -997,7 +997,7 @@ describe("512B circuit", function () { // request JSON - const requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + const requestJsonInitialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const requestJsonState1 = [ 1, 1, 1, 1, @@ -1019,7 +1019,7 @@ describe("512B circuit", function () { 0, 0, 0, 0, 0, 0, - 0, 0, 0 + 0, 0, 0, 0 ] const requestJsonState = [requestJsonInitialState, requestJsonState1]; @@ -1061,7 +1061,7 @@ describe("512B circuit", function () { let responseJsonCircuitCount = Math.ceil(responseBody.length / DATA_BYTES); - let initialState = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let initialState = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let jsonState1: (bigint | number)[] = [ 1, 1, 1, 1, @@ -1083,7 +1083,7 @@ describe("512B circuit", function () { 0, 0, 0, 0, 0, 0, - 0, 0, 0 + 0, 0, 0, 0 ]; let jsonState2: (bigint | number)[] = [ 1, 1, @@ -1106,7 +1106,7 @@ describe("512B circuit", function () { 0, 0, 0, 0, 0, 0, - 1, 1, 0 + 1, 1, 0, 0 ]; let jsonState3: (bigint | number)[] = [ 1, 1, @@ -1129,9 +1129,9 @@ describe("512B circuit", function () { 0, 0, 0, 0, 0, 0, - 0, 1, 0 + 0, 1, 0, 0 ]; - assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 3); + assert.deepEqual(jsonState1.length, MAX_STACK_HEIGHT * 4 + 4); let jsonStates = [initialState, jsonState1, jsonState2, jsonState3]; const [stack, treeHashes] = jsonTreeHasher(ciphertext_digest, manifest.response.body.json, MAX_STACK_HEIGHT); diff --git a/circuits/test/json/extraction.test.ts b/circuits/test/json/extraction.test.ts index dcb0b90..9e4c14c 100644 --- a/circuits/test/json/extraction.test.ts +++ b/circuits/test/json/extraction.test.ts @@ -4,10 +4,10 @@ import { assert } from "chai"; const DATA_BYTES = 320; const MAX_STACK_HEIGHT = 6; +const mock_ct_digest = poseidon2([69, 420]); describe("JSON Extraction", () => { let hash_parser: WitnessTester<["step_in", "ciphertext_digest", "data", "sequence_digest", "value_digest", "state"]>; - const mock_ct_digest = poseidon2([69, 420]); before(async () => { hash_parser = await circomkit.WitnessTester(`Parser`, { @@ -35,7 +35,7 @@ describe("JSON Extraction", () => { let data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); let step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; @@ -96,7 +96,7 @@ describe("JSON Extraction", () => { let data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); let value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); let step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; @@ -159,7 +159,7 @@ describe("JSON Extraction", () => { const sequence_digest_hashed = poseidon1([sequence_digest]); const data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); const value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); const state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); const step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; @@ -176,6 +176,42 @@ describe("JSON Extraction", () => { assert.deepEqual((json_extraction_step_out.step_out as BigInt[])[9], sequence_digest_hashed); }); + it(`input: string escape`, async () => { + let filename = "string_escape"; + let [input, _keyUnicode, _output] = readJSONInputFile(`${filename}.json`, []); + let input_padded = input.concat(Array(DATA_BYTES - input.length).fill(-1)); + + const KEY0 = strToBytes("a"); + const targetValue = strToBytes("\"b\""); + console.log(targetValue); + + const keySequence: JsonMaskType[] = [ + { type: "Object", value: KEY0 }, + ]; + + const [stack, treeHashes] = jsonTreeHasher(mock_ct_digest, keySequence, 10); + const sequence_digest = compressTreeHash(mock_ct_digest, [stack, treeHashes]); + const sequence_digest_hashed = poseidon1([sequence_digest]); + const data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); + + const value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); + let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); + const step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; + + let json_extraction_step_out = await hash_parser.compute({ + data: input_padded, + ciphertext_digest: mock_ct_digest, + sequence_digest, + value_digest, + step_in, + state, + }, ["step_out"]); + assert.deepEqual((json_extraction_step_out.step_out as BigInt[])[0], value_digest); + assert.deepEqual((json_extraction_step_out.step_out as BigInt[])[7], modPow(mock_ct_digest, BigInt(input.length))); + assert.deepEqual((json_extraction_step_out.step_out as BigInt[])[9], sequence_digest_hashed); + }); + it(`input: spotify`, async () => { let filename = "spotify"; let [input, _keyUnicode, _output] = readJSONInputFile(`${filename}.json`, []); @@ -201,7 +237,7 @@ describe("JSON Extraction", () => { const data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); const value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); const step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; @@ -245,7 +281,7 @@ describe("JSON Extraction", () => { let split_data_digest = PolynomialDigest(input1, mock_ct_digest, BigInt(0)); const value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); let step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; @@ -275,7 +311,7 @@ describe("JSON Extraction", () => { BigInt("4215832829314030653029106205864494290655121331068956006579751774144816160308"), 0, BigInt("10193689792027765875739665277472584711579103240499433210836208365265070585573"), 51, 0, 0, - 1, 0, 1 + 1, 0, 1, 0 ]; state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); assert.deepEqual(state_digest, json_step_out[8]); @@ -306,7 +342,7 @@ describe("JSON Extraction", () => { BigInt("4215832829314030653029106205864494290655121331068956006579751774144816160308"), 0, BigInt("10193689792027765875739665277472584711579103240499433210836208365265070585573"), 0, 0, 0, - 0, 0, 0 + 0, 0, 0, 0 ]; state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); assert.deepEqual(state_digest, (json_extraction_step_out.step_out as BigInt[])[8]); @@ -353,7 +389,7 @@ describe("JSON Extraction", () => { const data_digest = PolynomialDigest(input, mock_ct_digest, BigInt(0)); const value_digest = PolynomialDigest(targetValue, mock_ct_digest, BigInt(0)); - let state = Array(MAX_STACK_HEIGHT * 4 + 3).fill(0); + let state = Array(MAX_STACK_HEIGHT * 4 + 4).fill(0); let state_digest = PolynomialDigest(state, mock_ct_digest, BigInt(0)); const step_in = [data_digest, 0, 0, 0, 0, 0, 0, 1, state_digest, sequence_digest_hashed, 0]; diff --git a/circuits/test/json/index.ts b/circuits/test/json/index.ts index 3bde941..83a9649 100644 --- a/circuits/test/json/index.ts +++ b/circuits/test/json/index.ts @@ -47,10 +47,12 @@ export const INITIAL_IN = { stack: [[0, 0], [0, 0], [0, 0], [0, 0]], parsing_string: 0, parsing_number: 0, + escaped: 0, }; export const INITIAL_OUT = { next_stack: INITIAL_IN.stack, next_parsing_string: INITIAL_IN.parsing_string, next_parsing_number: INITIAL_IN.parsing_number, + next_escaped: INITIAL_IN.escaped }; \ No newline at end of file diff --git a/circuits/test/json/parser.test.ts b/circuits/test/json/parser.test.ts index 0c525b0..e01fee9 100644 --- a/circuits/test/json/parser.test.ts +++ b/circuits/test/json/parser.test.ts @@ -5,7 +5,7 @@ describe("JSON Parser", () => { it(`array only input`, async () => { let filename = "array_only"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, []); circuit = await circomkit.WitnessTester(`Parser`, { file: "json/parser", @@ -20,7 +20,22 @@ describe("JSON Parser", () => { it(`object input`, async () => { let filename = "value_object"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, []); + + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser", + template: "Parser", + params: [input.length, 3], + }); + + await circuit.expectPass({ + data: input + }); + }); + + it(`string_escape input`, async () => { + let filename = "string_escape"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, []); circuit = await circomkit.WitnessTester(`Parser`, { file: "json/parser", diff --git a/circuits/test/json/values.test.ts b/circuits/test/json/values.test.ts index f51f43b..7903518 100644 --- a/circuits/test/json/values.test.ts +++ b/circuits/test/json/values.test.ts @@ -3,8 +3,8 @@ import { Delimiters, WhiteSpace, Numbers, Escape, INITIAL_IN, INITIAL_OUT } from describe("StateUpdate :: Values", () => { let circuit: WitnessTester< - ["byte", "pointer", "stack", "parsing_string", "parsing_number"], - ["next_pointer", "next_stack", "next_parsing_string", "next_parsing_number"] + ["byte", "pointer", "stack", "parsing_string", "parsing_number", "escaped"], + ["next_pointer", "next_stack", "next_parsing_string", "next_parsing_number", "next_escaped"] >; before(async () => { circuit = await circomkit.WitnessTester(`GetTopOfStack`, { diff --git a/witness-generator/src/json/mod.rs b/witness-generator/src/json/mod.rs index 3c93a90..be3a651 100644 --- a/witness-generator/src/json/mod.rs +++ b/witness-generator/src/json/mod.rs @@ -24,7 +24,7 @@ pub enum Location { pub enum Status { #[default] None, - ParsingString(String), + ParsingString((String, bool)), ParsingNumber(String), } @@ -43,6 +43,7 @@ pub struct RawJsonMachine { pub tree_hash: [(F, F); MAX_STACK_HEIGHT], pub parsing_string: F, pub parsing_number: F, + pub escaped: F, pub monomial: F, } @@ -55,6 +56,7 @@ impl RawJsonMachine { parsing_string: F::ZERO, parsing_number: F::ZERO, monomial: F::ZERO, + escaped: F::ZERO, } } @@ -112,11 +114,12 @@ impl RawJsonMachine { parsing_number: F::ZERO, parsing_string: F::ZERO, monomial: F::ZERO, + escaped: F::ZERO, }) } - pub fn flatten(&self) -> [F; MAX_STACK_HEIGHT * 4 + 3] { - let mut output = [F::ZERO; MAX_STACK_HEIGHT * 4 + 3]; + pub fn flatten(&self) -> [F; MAX_STACK_HEIGHT * 4 + 4] { + let mut output = [F::ZERO; MAX_STACK_HEIGHT * 4 + 4]; for (idx, pair) in self.stack.iter().enumerate() { output[2 * idx] = pair.0; output[2 * idx + 1] = pair.1; @@ -128,6 +131,7 @@ impl RawJsonMachine { output[MAX_STACK_HEIGHT * 4] = self.monomial; output[MAX_STACK_HEIGHT * 4 + 1] = self.parsing_string; output[MAX_STACK_HEIGHT * 4 + 2] = self.parsing_number; + output[MAX_STACK_HEIGHT * 4 + 3] = self.escaped; output } } diff --git a/witness-generator/src/json/parser.rs b/witness-generator/src/json/parser.rs index 0001374..6705412 100644 --- a/witness-generator/src/json/parser.rs +++ b/witness-generator/src/json/parser.rs @@ -41,9 +41,15 @@ impl From> let mut parsing_number = F::ZERO; let mut parsing_string = F::ZERO; + let mut escaped = F::ZERO; match value.status { Status::ParsingNumber(_) => parsing_number = F::ONE, - Status::ParsingString(_) => parsing_string = F::ONE, + Status::ParsingString((_, escaped_bool)) => { + parsing_string = F::ONE; + if escaped_bool { + escaped = F::ONE; + } + }, Status::None => {}, } Self { @@ -53,6 +59,7 @@ impl From> parsing_number, parsing_string, monomial, + escaped, } } } @@ -81,7 +88,8 @@ impl JsonMachine { fn write_to_label_stack(&mut self) { match self.status.clone() { - Status::ParsingNumber(str) | Status::ParsingString(str) => match self.current_location() { + Status::ParsingNumber(str) | Status::ParsingString((str, _)) => match self.current_location() + { Location::ArrayIndex(_) | Location::ObjectValue => self.label_stack[self.pointer() - 1].1 = str, Location::ObjectKey => { @@ -122,6 +130,7 @@ const COLON: u8 = 58; const COMMA: u8 = 44; const QUOTE: u8 = 34; const NUMBER: [u8; 10] = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57]; +const ESCAPE: u8 = 92; // Tell clippy to eat shit #[allow(clippy::too_many_lines)] @@ -140,8 +149,8 @@ pub fn parse( let mut ctr = 0; for char in bytes { // Update the machine - // println!("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - // println!("char: {}, ctr: {}", *char as char, ctr); + println!("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + println!("char: {}, ctr: {}", *char as char, ctr); match *char { START_BRACE => match (machine.clone().status, machine.current_location()) { (Status::None, Location::None | Location::ObjectValue | Location::ArrayIndex(_)) => { @@ -205,8 +214,8 @@ pub fn parse( return Err(WitnessGeneratorError::JsonParser("Comma in invalid position!".to_string())), }, QUOTE => match machine.status { - Status::None => machine.status = Status::ParsingString(String::new()), - Status::ParsingString(_) => { + Status::None => machine.status = Status::ParsingString((String::new(), false)), + Status::ParsingString((_, false)) => { machine.status = Status::None; match machine.current_location() { @@ -216,20 +225,28 @@ pub fn parse( _ => {}, } }, + Status::ParsingString((mut str, true)) => { + str.push(*char as char); + machine.status = Status::ParsingString((str, false)); + }, Status::ParsingNumber(_) => return Err(WitnessGeneratorError::JsonParser( "Quote found while parsing number!".to_string(), )), }, + ESCAPE => + if let Status::ParsingString((str, false)) = machine.status { + machine.status = Status::ParsingString((str, true)); + }, c if NUMBER.contains(&c) => match machine.clone().status { Status::None => machine.status = Status::ParsingNumber(String::from(c as char)), Status::ParsingNumber(mut str) => { str.push(*char as char); machine.status = Status::ParsingNumber(str); }, - Status::ParsingString(mut str) => { + Status::ParsingString((mut str, _)) => { str.push(*char as char); - machine.status = Status::ParsingString(str); + machine.status = Status::ParsingString((str, false)); }, }, _ => match machine.status.clone() { @@ -237,9 +254,9 @@ pub fn parse( machine.status = Status::None; machine.clear_array_index_label(); }, - Status::ParsingString(mut str) => { + Status::ParsingString((mut str, _)) => { str.push(*char as char); - machine.status = Status::ParsingString(str); + machine.status = Status::ParsingString((str, false)); }, Status::None => {}, }, @@ -277,7 +294,10 @@ pub fn parse( // "state[ {ctr:?} ].parsing_number = {:?}", // BigUint::from_bytes_le(&raw_state.parsing_number.to_bytes()) // ); - + // println!( + // "state[ {ctr:?} ].escaped = {:?}", + // BigUint::from_bytes_le(&raw_state.escaped.to_bytes()) + // ); ctr += 1; // dbg!(&RawJsonMachine::from(machine.clone())); } @@ -357,18 +377,19 @@ mod tests { #[case::value_array_object(r#"{ "a" : [ { "b" : [ 1 , 4 ] } , { "c" : "b" } ] }"#)] #[case::value_object(r#"{ "a" : { "d" : "e" , "e" : "c" } , "e" : { "f" : "a" , "e" : "2" } , "g" : { "h" : { "a" : "c" } } , "ab" : "foobar" , "bc" : 42 , "dc" : [ 0 , 1 , "a" ] }"#)] #[case::value_float(r#"{"data":{"redditorInfoByName":{"id":"t2_tazi6mk","karma":{"fromAwardsGiven":0.0,"fromAwardsReceived":0.0,"fromComments":24.0,"fromPosts":1765.0,"total":1789.0}}}}"#)] + #[case::string_escape(r#"{"a": "\"b\""}"#)] fn test_json_parser_valid(#[case] input: &str) { let polynomial_input = create_polynomial_input(); - let states = parse::<5>(input.as_bytes(), polynomial_input).unwrap(); - assert_eq!(states.last().unwrap().location, [Location::None; 5]); + let states = parse::<2>(input.as_bytes(), polynomial_input).unwrap(); + assert_eq!(states.last().unwrap().location, [Location::None; 2]); assert_eq!( states.last().unwrap().label_stack, std::array::from_fn(|_| (String::new(), String::new())) ); let raw_states = - states.into_iter().map(RawJsonMachine::from).collect::>>(); + states.into_iter().map(RawJsonMachine::from).collect::>>(); assert_eq!(raw_states.len(), input.len()); verify_final_state(raw_states.last().unwrap());