-
Notifications
You must be signed in to change notification settings - Fork 395
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor(gen-schema-view): extract config parsing to their own module…
…s and update gemini model import
- Loading branch information
Showing
6 changed files
with
432 additions
and
227 deletions.
There are no files selected for viewing
175 changes: 175 additions & 0 deletions
175
firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
import { parseConfig } from "../../../src/config"; | ||
import { promptInquirer } from "../../../src/config/interactive"; | ||
import { | ||
parseProgram, | ||
validateNonInteractiveParams, | ||
} from "../../../src/config/non-interactive"; | ||
import { readSchemas } from "../../../src/schema-loader-utils"; | ||
|
||
// Mock dependencies | ||
jest.mock("../../../src/config/interactive", () => ({ | ||
promptInquirer: jest.fn(), | ||
})); | ||
|
||
jest.mock("../../../src/config/non-interactive", () => ({ | ||
parseProgram: jest.fn(), | ||
validateNonInteractiveParams: jest.fn(), | ||
})); | ||
|
||
jest.mock("../../../src/schema-loader-utils", () => ({ | ||
readSchemas: jest.fn(), | ||
})); | ||
|
||
// Mock process.exit to prevent tests from actually exiting | ||
const mockExit = jest.spyOn(process, "exit").mockImplementation((code) => { | ||
throw new Error(`Process exited with code ${code}`); | ||
}); | ||
|
||
describe("parseConfig", () => { | ||
beforeEach(() => { | ||
jest.clearAllMocks(); | ||
}); | ||
|
||
describe("Non-interactive mode", () => { | ||
it("should return CLI config from command line arguments", async () => { | ||
// Setup mocks for non-interactive mode | ||
const mockProgram = { | ||
nonInteractive: true, | ||
project: "test-project", | ||
bigQueryProject: "test-bq-project", | ||
dataset: "test-dataset", | ||
tableNamePrefix: "test-prefix", | ||
schemaFiles: ["schema1.json", "schema2.json"], | ||
outputHelp: jest.fn(), | ||
}; | ||
|
||
const mockSchemas = { | ||
schema1: { fields: { field1: { type: "string" } } }, | ||
schema2: { fields: { field2: { type: "number" } } }, | ||
}; | ||
|
||
(parseProgram as jest.Mock).mockReturnValue(mockProgram); | ||
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true); | ||
(readSchemas as jest.Mock).mockReturnValue(mockSchemas); | ||
|
||
const result = await parseConfig(); | ||
|
||
expect(parseProgram).toHaveBeenCalled(); | ||
expect(validateNonInteractiveParams).toHaveBeenCalledWith(mockProgram); | ||
expect(readSchemas).toHaveBeenCalledWith(mockProgram.schemaFiles); | ||
expect(result).toEqual({ | ||
projectId: "test-project", | ||
bigQueryProjectId: "test-bq-project", | ||
datasetId: "test-dataset", | ||
tableNamePrefix: "test-prefix", | ||
schemas: mockSchemas, | ||
}); | ||
}); | ||
|
||
it("should use project as bigQueryProject if not specified", async () => { | ||
// Setup mocks with missing bigQueryProject | ||
const mockProgram = { | ||
nonInteractive: true, | ||
project: "test-project", | ||
bigQueryProject: undefined, | ||
dataset: "test-dataset", | ||
tableNamePrefix: "test-prefix", | ||
schemaFiles: ["schema.json"], | ||
outputHelp: jest.fn(), | ||
}; | ||
|
||
const mockSchemas = { schema: { fields: { field: { type: "string" } } } }; | ||
|
||
(parseProgram as jest.Mock).mockReturnValue(mockProgram); | ||
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true); | ||
(readSchemas as jest.Mock).mockReturnValue(mockSchemas); | ||
|
||
const result = await parseConfig(); | ||
|
||
expect(result.bigQueryProjectId).toBe("test-project"); | ||
}); | ||
|
||
it("should exit if required parameters are missing", async () => { | ||
const mockProgram = { | ||
nonInteractive: true, | ||
outputHelp: jest.fn(), | ||
}; | ||
|
||
(parseProgram as jest.Mock).mockReturnValue(mockProgram); | ||
(validateNonInteractiveParams as jest.Mock).mockReturnValue(false); | ||
|
||
await expect(parseConfig()).rejects.toThrow("Process exited with code 1"); | ||
expect(mockProgram.outputHelp).toHaveBeenCalled(); | ||
expect(mockExit).toHaveBeenCalledWith(1); | ||
}); | ||
}); | ||
|
||
describe("Interactive mode", () => { | ||
it("should return CLI config from inquirer prompts", async () => { | ||
// Setup mocks for interactive mode | ||
const mockProgram = { | ||
nonInteractive: false, | ||
}; | ||
|
||
const mockPromptResponse = { | ||
project: "interactive-project", | ||
bigQueryProject: "interactive-bq-project", | ||
dataset: "interactive-dataset", | ||
tableNamePrefix: "interactive-prefix", | ||
schemaFiles: "schema1.json, schema2.json", | ||
}; | ||
|
||
const mockSchemas = { | ||
schema1: { fields: { field1: { type: "string" } } }, | ||
schema2: { fields: { field2: { type: "number" } } }, | ||
}; | ||
|
||
(parseProgram as jest.Mock).mockReturnValue(mockProgram); | ||
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse); | ||
(readSchemas as jest.Mock).mockReturnValue(mockSchemas); | ||
|
||
const result = await parseConfig(); | ||
|
||
expect(parseProgram).toHaveBeenCalled(); | ||
expect(promptInquirer).toHaveBeenCalled(); | ||
expect(readSchemas).toHaveBeenCalledWith([ | ||
"schema1.json", | ||
"schema2.json", | ||
]); | ||
expect(result).toEqual({ | ||
projectId: "interactive-project", | ||
bigQueryProjectId: "interactive-bq-project", | ||
datasetId: "interactive-dataset", | ||
tableNamePrefix: "interactive-prefix", | ||
schemas: mockSchemas, | ||
}); | ||
}); | ||
|
||
it("should properly trim and split schema file paths", async () => { | ||
const mockProgram = { | ||
nonInteractive: false, | ||
}; | ||
|
||
const mockPromptResponse = { | ||
project: "test-project", | ||
bigQueryProject: "test-bq-project", | ||
dataset: "test-dataset", | ||
tableNamePrefix: "test-prefix", | ||
schemaFiles: " schema1.json, schema2.json , schema3.json", | ||
}; | ||
|
||
(parseProgram as jest.Mock).mockReturnValue(mockProgram); | ||
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse); | ||
(readSchemas as jest.Mock).mockReturnValue({}); | ||
|
||
await parseConfig(); | ||
|
||
// Verify that file paths are properly trimmed and split | ||
expect(readSchemas).toHaveBeenCalledWith([ | ||
"schema1.json", | ||
"schema2.json", | ||
"schema3.json", | ||
]); | ||
}); | ||
}); | ||
}); |
66 changes: 66 additions & 0 deletions
66
firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import { FirestoreSchema } from "../schema"; | ||
import { readSchemas } from "../schema-loader-utils"; | ||
import { promptInquirer } from "./interactive"; | ||
import { parseProgram, validateNonInteractiveParams } from "./non-interactive"; | ||
|
||
const DEFAULT_SAMPLE_SIZE = 100; | ||
|
||
interface CliConfig { | ||
projectId: string; | ||
bigQueryProjectId: string; | ||
datasetId: string; | ||
tableNamePrefix: string; | ||
// TODO: isn't this the same as tableNamePrefix? check. | ||
collectionPath?: string; | ||
schemas: { [schemaName: string]: FirestoreSchema }; | ||
useGemini?: boolean; | ||
agentSampleSize?: number; | ||
googleAiKey?: string; | ||
} | ||
|
||
export async function parseConfig(): Promise<CliConfig> { | ||
const program = parseProgram(); | ||
if (program.nonInteractive) { | ||
if (!validateNonInteractiveParams(program)) { | ||
program.outputHelp(); | ||
process.exit(1); | ||
} | ||
|
||
return { | ||
projectId: program.project, | ||
bigQueryProjectId: program.bigQueryProject || program.project, | ||
datasetId: program.dataset, | ||
tableNamePrefix: program.tableNamePrefix, | ||
collectionPath: program.collectionPath, | ||
schemas: readSchemas(program.schemaFiles), | ||
useGemini: program.useGemini, | ||
agentSampleSize: DEFAULT_SAMPLE_SIZE, | ||
googleAiKey: program.googleAiKey, | ||
}; | ||
} | ||
const { | ||
project, | ||
bigQueryProject, | ||
dataset, | ||
tableNamePrefix, | ||
schemaFiles, | ||
collectionPath, | ||
useGemini, | ||
// TODO: rename? | ||
googleAiKey, | ||
} = await promptInquirer(); | ||
|
||
return { | ||
projectId: project, | ||
bigQueryProjectId: bigQueryProject, | ||
datasetId: dataset, | ||
tableNamePrefix: tableNamePrefix, | ||
collectionPath: collectionPath, | ||
schemas: readSchemas( | ||
schemaFiles.split(",").map((schemaFileName) => schemaFileName.trim()) | ||
), | ||
useGemini: useGemini, | ||
agentSampleSize: DEFAULT_SAMPLE_SIZE, | ||
googleAiKey: googleAiKey, | ||
}; | ||
} |
97 changes: 97 additions & 0 deletions
97
firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import inquirer from "inquirer"; | ||
|
||
const BIGQUERY_VALID_CHARACTERS = /^[a-zA-Z0-9_]+$/; | ||
const FIRESTORE_VALID_CHARACTERS = /^[^\/]+$/; | ||
const GCP_PROJECT_VALID_CHARACTERS = /^[a-z][a-z0-9-]{0,29}$/; | ||
|
||
const validateInput = (value: any, name: string, regex: RegExp) => { | ||
if (!value || value === "" || value.trim() === "") { | ||
return `Please supply a ${name}`; | ||
} | ||
if (!value.match(regex)) { | ||
return `The ${name} must only contain letters or spaces`; | ||
} | ||
return true; | ||
}; | ||
|
||
export const questions = [ | ||
{ | ||
message: "What is your Firebase project ID?", | ||
name: "project", | ||
default: process.env.PROJECT_ID, | ||
type: "input", | ||
validate: (value) => | ||
validateInput(value, "project ID", FIRESTORE_VALID_CHARACTERS), | ||
}, | ||
{ | ||
message: | ||
"What is your Google Cloud Project ID for BigQuery? (can be the same as the Firebase project ID)", | ||
name: "bigQueryProject", | ||
default: process.env.PROJECT_ID, | ||
type: "input", | ||
validate: (value) => | ||
validateInput(value, "BigQuery project ID", GCP_PROJECT_VALID_CHARACTERS), | ||
}, | ||
{ | ||
message: | ||
"What is the ID of the BigQuery dataset the raw changelog lives in? (The dataset and the raw changelog must already exist!)", | ||
name: "dataset", | ||
type: "input", | ||
validate: (value) => | ||
validateInput(value, "dataset ID", BIGQUERY_VALID_CHARACTERS), | ||
}, | ||
{ | ||
message: | ||
"What is the name of the Cloud Firestore collection for which you want to generate a schema view?", | ||
name: "tableNamePrefix", | ||
type: "input", | ||
validate: (value) => | ||
validateInput(value, "table name prefix", BIGQUERY_VALID_CHARACTERS), | ||
}, | ||
{ | ||
message: | ||
"Where should this script look for schema definitions? (Enter a comma-separated list of, optionally globbed, paths to files or directories).", | ||
name: "schemaFiles", | ||
type: "input", | ||
}, | ||
{ | ||
message: | ||
"Would you like to use a Gemini to automatically analyze your data and generate a draft schema?", | ||
name: "useGemini", | ||
type: "confirm", | ||
default: false, | ||
}, | ||
// TODO: I dont think this is required as we have it above | ||
// TODO: can we make the questions conditional? if we select useGemini then dont ask about finding schema files? | ||
{ | ||
message: "What is the Firestore collection path you want to analyze?", | ||
name: "collectionPath", | ||
type: "input", | ||
when: (answers) => answers.useGemini, | ||
validate: (value) => | ||
validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS), | ||
}, | ||
{ | ||
message: "Please provide your Google AI API Key:", | ||
name: "googleAiKey", | ||
type: "password", | ||
when: (answers) => answers.useGemini, | ||
validate: (value) => { | ||
if (!value || value.trim() === "") { | ||
return "Google AI API Key is required"; | ||
} | ||
return true; | ||
}, | ||
}, | ||
{ | ||
message: "Where should the generated schema files be stored?", | ||
name: "schemaDirectory", | ||
type: "input", | ||
when: (answers) => answers.useGemini, | ||
default: "./schemas", | ||
}, | ||
]; | ||
|
||
export const promptInquirer = () => { | ||
return inquirer.prompt(questions); | ||
}; |
Oops, something went wrong.