Skip to content

Commit

Permalink
refactor(gen-schema-view): extract config parsing to their own module…
Browse files Browse the repository at this point in the history
…s and update gemini model import
  • Loading branch information
cabljac committed Mar 4, 2025
1 parent 39e704e commit 1d279cf
Show file tree
Hide file tree
Showing 6 changed files with 432 additions and 227 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import { parseConfig } from "../../../src/config";
import { promptInquirer } from "../../../src/config/interactive";
import {
parseProgram,
validateNonInteractiveParams,
} from "../../../src/config/non-interactive";
import { readSchemas } from "../../../src/schema-loader-utils";

// Mock dependencies
jest.mock("../../../src/config/interactive", () => ({
promptInquirer: jest.fn(),
}));

jest.mock("../../../src/config/non-interactive", () => ({
parseProgram: jest.fn(),
validateNonInteractiveParams: jest.fn(),
}));

jest.mock("../../../src/schema-loader-utils", () => ({
readSchemas: jest.fn(),
}));

// Mock process.exit to prevent tests from actually exiting
const mockExit = jest.spyOn(process, "exit").mockImplementation((code) => {
throw new Error(`Process exited with code ${code}`);
});

describe("parseConfig", () => {
beforeEach(() => {
jest.clearAllMocks();
});

describe("Non-interactive mode", () => {
it("should return CLI config from command line arguments", async () => {
// Setup mocks for non-interactive mode
const mockProgram = {
nonInteractive: true,
project: "test-project",
bigQueryProject: "test-bq-project",
dataset: "test-dataset",
tableNamePrefix: "test-prefix",
schemaFiles: ["schema1.json", "schema2.json"],
outputHelp: jest.fn(),
};

const mockSchemas = {
schema1: { fields: { field1: { type: "string" } } },
schema2: { fields: { field2: { type: "number" } } },
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true);
(readSchemas as jest.Mock).mockReturnValue(mockSchemas);

const result = await parseConfig();

expect(parseProgram).toHaveBeenCalled();
expect(validateNonInteractiveParams).toHaveBeenCalledWith(mockProgram);
expect(readSchemas).toHaveBeenCalledWith(mockProgram.schemaFiles);
expect(result).toEqual({
projectId: "test-project",
bigQueryProjectId: "test-bq-project",
datasetId: "test-dataset",
tableNamePrefix: "test-prefix",
schemas: mockSchemas,
});
});

it("should use project as bigQueryProject if not specified", async () => {
// Setup mocks with missing bigQueryProject
const mockProgram = {
nonInteractive: true,
project: "test-project",
bigQueryProject: undefined,
dataset: "test-dataset",
tableNamePrefix: "test-prefix",
schemaFiles: ["schema.json"],
outputHelp: jest.fn(),
};

const mockSchemas = { schema: { fields: { field: { type: "string" } } } };

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true);
(readSchemas as jest.Mock).mockReturnValue(mockSchemas);

const result = await parseConfig();

expect(result.bigQueryProjectId).toBe("test-project");
});

it("should exit if required parameters are missing", async () => {
const mockProgram = {
nonInteractive: true,
outputHelp: jest.fn(),
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(validateNonInteractiveParams as jest.Mock).mockReturnValue(false);

await expect(parseConfig()).rejects.toThrow("Process exited with code 1");
expect(mockProgram.outputHelp).toHaveBeenCalled();
expect(mockExit).toHaveBeenCalledWith(1);
});
});

describe("Interactive mode", () => {
it("should return CLI config from inquirer prompts", async () => {
// Setup mocks for interactive mode
const mockProgram = {
nonInteractive: false,
};

const mockPromptResponse = {
project: "interactive-project",
bigQueryProject: "interactive-bq-project",
dataset: "interactive-dataset",
tableNamePrefix: "interactive-prefix",
schemaFiles: "schema1.json, schema2.json",
};

const mockSchemas = {
schema1: { fields: { field1: { type: "string" } } },
schema2: { fields: { field2: { type: "number" } } },
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse);
(readSchemas as jest.Mock).mockReturnValue(mockSchemas);

const result = await parseConfig();

expect(parseProgram).toHaveBeenCalled();
expect(promptInquirer).toHaveBeenCalled();
expect(readSchemas).toHaveBeenCalledWith([
"schema1.json",
"schema2.json",
]);
expect(result).toEqual({
projectId: "interactive-project",
bigQueryProjectId: "interactive-bq-project",
datasetId: "interactive-dataset",
tableNamePrefix: "interactive-prefix",
schemas: mockSchemas,
});
});

it("should properly trim and split schema file paths", async () => {
const mockProgram = {
nonInteractive: false,
};

const mockPromptResponse = {
project: "test-project",
bigQueryProject: "test-bq-project",
dataset: "test-dataset",
tableNamePrefix: "test-prefix",
schemaFiles: " schema1.json, schema2.json , schema3.json",
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse);
(readSchemas as jest.Mock).mockReturnValue({});

await parseConfig();

// Verify that file paths are properly trimmed and split
expect(readSchemas).toHaveBeenCalledWith([
"schema1.json",
"schema2.json",
"schema3.json",
]);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { FirestoreSchema } from "../schema";
import { readSchemas } from "../schema-loader-utils";
import { promptInquirer } from "./interactive";
import { parseProgram, validateNonInteractiveParams } from "./non-interactive";

const DEFAULT_SAMPLE_SIZE = 100;

interface CliConfig {
projectId: string;
bigQueryProjectId: string;
datasetId: string;
tableNamePrefix: string;
// TODO: isn't this the same as tableNamePrefix? check.
collectionPath?: string;
schemas: { [schemaName: string]: FirestoreSchema };
useGemini?: boolean;
agentSampleSize?: number;
googleAiKey?: string;
}

export async function parseConfig(): Promise<CliConfig> {
const program = parseProgram();
if (program.nonInteractive) {
if (!validateNonInteractiveParams(program)) {
program.outputHelp();
process.exit(1);
}

return {
projectId: program.project,
bigQueryProjectId: program.bigQueryProject || program.project,
datasetId: program.dataset,
tableNamePrefix: program.tableNamePrefix,
collectionPath: program.collectionPath,
schemas: readSchemas(program.schemaFiles),
useGemini: program.useGemini,
agentSampleSize: DEFAULT_SAMPLE_SIZE,
googleAiKey: program.googleAiKey,
};
}
const {
project,
bigQueryProject,
dataset,
tableNamePrefix,
schemaFiles,
collectionPath,
useGemini,
// TODO: rename?
googleAiKey,
} = await promptInquirer();

return {
projectId: project,
bigQueryProjectId: bigQueryProject,
datasetId: dataset,
tableNamePrefix: tableNamePrefix,
collectionPath: collectionPath,
schemas: readSchemas(
schemaFiles.split(",").map((schemaFileName) => schemaFileName.trim())
),
useGemini: useGemini,
agentSampleSize: DEFAULT_SAMPLE_SIZE,
googleAiKey: googleAiKey,
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import inquirer from "inquirer";

const BIGQUERY_VALID_CHARACTERS = /^[a-zA-Z0-9_]+$/;
const FIRESTORE_VALID_CHARACTERS = /^[^\/]+$/;
const GCP_PROJECT_VALID_CHARACTERS = /^[a-z][a-z0-9-]{0,29}$/;

const validateInput = (value: any, name: string, regex: RegExp) => {
if (!value || value === "" || value.trim() === "") {
return `Please supply a ${name}`;
}
if (!value.match(regex)) {
return `The ${name} must only contain letters or spaces`;
}
return true;
};

export const questions = [
{
message: "What is your Firebase project ID?",
name: "project",
default: process.env.PROJECT_ID,
type: "input",
validate: (value) =>
validateInput(value, "project ID", FIRESTORE_VALID_CHARACTERS),
},
{
message:
"What is your Google Cloud Project ID for BigQuery? (can be the same as the Firebase project ID)",
name: "bigQueryProject",
default: process.env.PROJECT_ID,
type: "input",
validate: (value) =>
validateInput(value, "BigQuery project ID", GCP_PROJECT_VALID_CHARACTERS),
},
{
message:
"What is the ID of the BigQuery dataset the raw changelog lives in? (The dataset and the raw changelog must already exist!)",
name: "dataset",
type: "input",
validate: (value) =>
validateInput(value, "dataset ID", BIGQUERY_VALID_CHARACTERS),
},
{
message:
"What is the name of the Cloud Firestore collection for which you want to generate a schema view?",
name: "tableNamePrefix",
type: "input",
validate: (value) =>
validateInput(value, "table name prefix", BIGQUERY_VALID_CHARACTERS),
},
{
message:
"Where should this script look for schema definitions? (Enter a comma-separated list of, optionally globbed, paths to files or directories).",
name: "schemaFiles",
type: "input",
},
{
message:
"Would you like to use a Gemini to automatically analyze your data and generate a draft schema?",
name: "useGemini",
type: "confirm",
default: false,
},
// TODO: I dont think this is required as we have it above
// TODO: can we make the questions conditional? if we select useGemini then dont ask about finding schema files?
{
message: "What is the Firestore collection path you want to analyze?",
name: "collectionPath",
type: "input",
when: (answers) => answers.useGemini,
validate: (value) =>
validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS),
},
{
message: "Please provide your Google AI API Key:",
name: "googleAiKey",
type: "password",
when: (answers) => answers.useGemini,
validate: (value) => {
if (!value || value.trim() === "") {
return "Google AI API Key is required";
}
return true;
},
},
{
message: "Where should the generated schema files be stored?",
name: "schemaDirectory",
type: "input",
when: (answers) => answers.useGemini,
default: "./schemas",
},
];

export const promptInquirer = () => {
return inquirer.prompt(questions);
};
Loading

0 comments on commit 1d279cf

Please sign in to comment.