Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
CorieW committed Mar 5, 2025
1 parent 39efe53 commit a9165ca
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,33 @@ describe("parseConfig", () => {
expect(result.bigQueryProjectId).toBe("test-project");
});

it("should use gemini if specified", async () => { // TODO: This test needs completed
// Setup mocks with useGemini = true
const mockProgram = {
nonInteractive: true,
project: "test-project",
bigQueryProject: "test-bq-project",
dataset: "test-dataset",
tableNamePrefix: "test-prefix",
schemaFiles: ["schema.json"],
useGemini: true,
googleAiKey: "test-key",
geminiAnalyzeCollectionPath: "test-collection",
schemaDirectory: "test-directory",
outputHelp: jest.fn(),
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(validateNonInteractiveParams as jest.Mock).mockReturnValue(true);

const result = await parseConfig();

expect(result.useGemini).toBe(true);
expect(result.googleAiKey).toBe("test-key");
expect(result.geminiAnalyzeCollectionPath).toBe("test-collection");
expect(result.schemaDirectory).toBe("test-directory");
});

it("should exit if required parameters are missing", async () => {
const mockProgram = {
nonInteractive: true,
Expand All @@ -104,7 +131,7 @@ describe("parseConfig", () => {
});
});

describe("Interactive mode", () => {
describe("Interactive mode without Gemini", () => {
it("should return CLI config from inquirer prompts", async () => {
// Setup mocks for interactive mode
const mockProgram = {
Expand All @@ -116,6 +143,7 @@ describe("parseConfig", () => {
bigQueryProject: "interactive-bq-project",
dataset: "interactive-dataset",
tableNamePrefix: "interactive-prefix",
useGemini: false,
schemaFiles: "schema1.json, schema2.json",
};

Expand Down Expand Up @@ -155,6 +183,7 @@ describe("parseConfig", () => {
bigQueryProject: "test-bq-project",
dataset: "test-dataset",
tableNamePrefix: "test-prefix",
useGemini: false,
schemaFiles: " schema1.json, schema2.json , schema3.json",
};

Expand All @@ -172,4 +201,49 @@ describe("parseConfig", () => {
]);
});
});

describe("Interactive mode with Gemini", () => { // TODO: This needs completed
it("should return CLI config from inquirer prompts", async () => {
// Setup mocks for interactive mode
const mockProgram = {
nonInteractive: false,
};

const mockPromptResponse = {
project: "interactive-project",
bigQueryProject: "interactive-bq-project",
dataset: "interactive-dataset",
tableNamePrefix: "interactive-prefix",
useGemini: true,
googleAiKey: "test-key",
geminiAnalyzeCollectionPath: "test-collection",
schemaDirectory: "test-directory",
};

const mockSchemas = {
schema1: { fields: { field1: { type: "string" } } },
schema2: { fields: { field2: { type: "number" } } },
};

(parseProgram as jest.Mock).mockReturnValue(mockProgram);
(promptInquirer as jest.Mock).mockResolvedValue(mockPromptResponse);
(readSchemas as jest.Mock).mockReturnValue(mockSchemas);

const result = await parseConfig();

expect(parseProgram).toHaveBeenCalled();
expect(promptInquirer).toHaveBeenCalled();
expect(readSchemas).toHaveBeenCalledWith([
"schema1.json",
"schema2.json",
]);
expect(result).toEqual({
projectId: "interactive-project",
bigQueryProjectId: "interactive-bq-project",
datasetId: "interactive-dataset",
tableNamePrefix: "interactive-prefix",
schemas: mockSchemas,
});
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ export interface CliConfig {
tableNamePrefix: string;
schemas: { [schemaName: string]: FirestoreSchema };
useGemini?: boolean;
geminiAnalyzeCollectionPath?: string;
agentSampleSize?: number;
googleAiKey?: string;
schemaDirectory?: string;
}

export async function parseConfig(): Promise<CliConfig> {
Expand All @@ -33,8 +35,10 @@ export async function parseConfig(): Promise<CliConfig> {
tableNamePrefix: program.tableNamePrefix,
useGemini: program.useGemini,
schemas: !program.useGemini ? readSchemas(program.schemaFiles) : {},
geminiAnalyzeCollectionPath: program.geminiAnalyzeCollectionPath,
agentSampleSize: DEFAULT_SAMPLE_SIZE,
googleAiKey: program.googleAiKey,
schemaDirectory: program.schemaDirectory,
};
}
const {
Expand All @@ -44,20 +48,23 @@ export async function parseConfig(): Promise<CliConfig> {
tableNamePrefix,
schemaFiles,
useGemini,
// TODO: rename?
geminiAnalyzeCollectionPath,
googleAiKey,
schemaDirectory,
} = await promptInquirer();

return {
projectId: project,
bigQueryProjectId: bigQueryProject,
datasetId: dataset,
tableNamePrefix: tableNamePrefix,
tableNamePrefix,
schemas: !useGemini ? readSchemas(
schemaFiles.split(",").map((schemaFileName) => schemaFileName.trim())
) : {},
useGemini: useGemini,
useGemini,
geminiAnalyzeCollectionPath,
agentSampleSize: DEFAULT_SAMPLE_SIZE,
googleAiKey: googleAiKey,
googleAiKey,
schemaDirectory,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export const questions = [
{
message: "What is your Firebase project ID?",
name: "project",
default: process.env.PROJECT_ID,
default: "dev-extensions-testing",
type: "input",
validate: (value) =>
validateInput(value, "project ID", FIRESTORE_VALID_CHARACTERS),
Expand All @@ -27,7 +27,7 @@ export const questions = [
message:
"What is your Google Cloud Project ID for BigQuery? (can be the same as the Firebase project ID)",
name: "bigQueryProject",
default: process.env.PROJECT_ID,
default: "dev-extensions-testing",
type: "input",
validate: (value) =>
validateInput(value, "BigQuery project ID", GCP_PROJECT_VALID_CHARACTERS),
Expand All @@ -37,23 +37,26 @@ export const questions = [
"What is the ID of the BigQuery dataset the raw changelog lives in? (The dataset and the raw changelog must already exist!)",
name: "dataset",
type: "input",
default: "2025_stress_test",
validate: (value) =>
validateInput(value, "dataset ID", BIGQUERY_VALID_CHARACTERS),
},
{
message:
"What is the name of the Cloud Firestore collection for which you want to generate a schema view?",
"What prefix should be used for the names of the views generated by this script?",
name: "tableNamePrefix",
type: "input",
default: "2025_stress_test",
validate: (value) =>
validateInput(value, "table name prefix", BIGQUERY_VALID_CHARACTERS),
requiredOption: false,
},
{
message:
"Would you like to use a Gemini to automatically analyze your data and generate a draft schema?",
name: "useGemini",
type: "confirm",
default: false,
default: true,
},
{
message:
Expand All @@ -62,20 +65,11 @@ export const questions = [
type: "input",
when: (answers) => !answers.useGemini,
},
// TODO: I dont think this is required as we have it above
// TODO: can we make the questions conditional? if we select useGemini then dont ask about finding schema files?
// {
// message: "What is the Firestore collection path you want to analyze?",
// name: "collectionPath",
// type: "input",
// when: (answers) => answers.useGemini,
// validate: (value) =>
// validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS),
// },
{
message: "Please provide your Google AI API Key:",
name: "googleAiKey",
type: "password",
default: "AIzaSyAv_SeZkZCo_qVjrysxxtasHf6sN5yG9wg",
when: (answers) => answers.useGemini,
validate: (value) => {
if (!value || value.trim() === "") {
Expand All @@ -84,6 +78,14 @@ export const questions = [
return true;
},
},
{
message: "What is the Firestore collection path you want Gemini to analyze?",
name: "geminiAnalyzeCollectionPath",
type: "input",
when: (answers) => answers.useGemini,
validate: (value) =>
validateInput(value, "collection path", FIRESTORE_VALID_CHARACTERS),
},
{
message: "Where should the generated schema files be stored?",
name: "schemaDirectory",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ export const configureProgram = () => {
false
)
.option(
"-c, --collection-path <path>",
"-c, --gemini-analyze-collection-path <path>",
"Firestore collection path for Gemini to analyze"
)
.option(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ import type { CliConfig } from "../config";
import firebase = require("firebase-admin");
import { genkit, z } from "genkit";
import { googleAI, gemini20Flash } from "@genkit-ai/googleai";
import * as fs from "fs/promises";
import * as fs from "fs";
import * as path from "path";
import inquirer from "inquirer";
import {SchemaSchema} from './genkitSchema'

export async function sampleFirestoreDocuments(
collectionPath: string,
Expand All @@ -25,7 +24,6 @@ export async function sampleFirestoreDocuments(
return serializeDocument(data);
});

console.log(`Successfully sampled ${documents.length} documents.`);
return documents;
} catch (error) {
console.error("Error sampling documents:", error);
Expand Down Expand Up @@ -67,44 +65,19 @@ function serializeDocument(data: any): any {
return data;
}

/**
* Writes a schema file to the specified directory if it does not already exist.
*
* @param {string} schemaDirectory - The directory where schema files are stored.
* @param {string} fileName - The name of the schema file to write.
* @param {string} content - The content of the schema file as a JSON string.
* @returns {Promise<string>} - A message indicating success or an error if the file already exists.
*/
const writeSchemaFile = async (
schemaDirectory: string,
fileName: string,
content: string
): Promise<string> => {
const filePath = path.join(schemaDirectory, fileName);
try {
await fs.access(filePath);
return "Error: Schema file already exists";
} catch {
await fs.writeFile(filePath, content);
return "Schema created successfully";
}
};

const biqquerySchemaPrompt = ({
collectionName,
collectionPath,
sampleData,
tablePrefix,
}: {
collectionName: string;
collectionPath: string;
sampleData: any[];
tablePrefix: string;
}) => `
You are a Schema Management Agent for Generating BigQuery schemas from Firestore Collections.
Your primary tasks are:
1. Analyze the provided sample documents
2. Generate an appropriate BigQuery schema
I will provide you with sample documents from the collection "${collectionName}".
I will provide you with sample documents from the collection "${collectionPath}".
Here are the sample documents to analyze:
${JSON.stringify(sampleData, null, 2)}
Expand Down Expand Up @@ -194,14 +167,19 @@ const biqquerySchemaPrompt = ({
export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
// get sample data from Firestore
const sampleData = await sampleFirestoreDocuments(
config.tableNamePrefix!,
config.geminiAnalyzeCollectionPath!,
config.agentSampleSize!
);

if (sampleData.length === 0) {
console.log("Operation cancelled. No sample data found. Either the collection is empty or the collection path is incorrect.");
process.exit(0);
}
console.log(`Successfully sampled ${sampleData.length} documents from collection ${config.geminiAnalyzeCollectionPath}`);

const prompt = biqquerySchemaPrompt({
collectionName: config.tableNamePrefix!,
collectionPath: config.geminiAnalyzeCollectionPath!,
sampleData,
tablePrefix: config.tableNamePrefix,
});

// initialize genkit with googleAI plugin
Expand All @@ -218,12 +196,49 @@ export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
model: gemini20Flash,
prompt,
output: {
format: 'json',
schema: SchemaSchema
format: "json",
schema: z.object({
fields: z.array(z.object({
name: z.string(),
type: z.string(),
description: z.string(),
fields: z.array(z.object({
name: z.string(),
type: z.string(),
description: z.string(),
fields: z.array(z.object({
name: z.string(),
type: z.string(),
description: z.string(),
column_name: z.string().optional(),
})),
})),
})),
})
}});

const filePath = path.join(config.schemaDirectory, `${config.tableNamePrefix}.json`);

// Check if a file exists
if (fs.existsSync(filePath)) {
const overwriteConfirm = await inquirer.prompt([
{
type: "confirm",
name: "proceed",
message:
"Schema file already exists. Would you like to overwrite it?",
default: false,
},
]);

if (!overwriteConfirm.proceed) {
console.log("Operation cancelled. Please choose a different schema file name.");
process.exit(0);
}
});

await writeSchemaFile("./schemas", `${config.tableNamePrefix}.json`, text);
await fs.promises.writeFile(filePath, text);
}

// confirm with user that schema file is correct
const confirmation = await inquirer.prompt([
{
Expand Down

0 comments on commit a9165ca

Please sign in to comment.