-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e97f0e9
commit 398099c
Showing
6 changed files
with
337 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#!/usr/bin/env node | ||
import uniq from 'lodash/uniq'; | ||
|
||
import yargs from 'yargs-parser'; | ||
import { logger } from './logger'; | ||
import colors from 'colors'; | ||
import { buildTranscendGraphQLClient } from './graphql'; | ||
import { DEFAULT_TRANSCEND_API } from './constants'; | ||
import { pullUnstructuredSubDataPointRecommendations } from './data-inventory'; | ||
import { writeCsv } from './cron'; | ||
import { splitCsvToList } from './requests'; | ||
import { DataCategoryType } from '@transcend-io/privacy-types'; | ||
|
||
/** | ||
* Sync entries from Transcend inventory to a CSV | ||
* | ||
* Dev Usage: | ||
* yarn ts-node ./src/cli-pull-entries.ts --auth=$TRANSCEND_API_KEY | ||
* | ||
* Standard usage | ||
* yarn cli-pull-entries --auth=$TRANSCEND_API_KEY | ||
*/ | ||
async function main(): Promise<void> { | ||
// Parse command line arguments | ||
const { | ||
file = './entries.csv', | ||
transcendUrl = DEFAULT_TRANSCEND_API, | ||
auth, | ||
dataSiloIds = '', | ||
includeGuessedCategories = 'false', | ||
parentCategories = '', | ||
subCategories = '', | ||
} = yargs(process.argv.slice(2)); | ||
|
||
// Ensure auth is passed | ||
if (!auth) { | ||
logger.error( | ||
colors.red( | ||
'A Transcend API key must be provided. You can specify using --auth=$TRANSCEND_API_KEY', | ||
), | ||
); | ||
process.exit(1); | ||
} | ||
|
||
// Validate trackerStatuses | ||
const parsedParentCategories = splitCsvToList( | ||
parentCategories, | ||
) as DataCategoryType[]; | ||
const invalidParentCategories = parsedParentCategories.filter( | ||
(type) => !Object.values(DataCategoryType).includes(type), | ||
); | ||
if (invalidParentCategories.length > 0) { | ||
logger.error( | ||
colors.red( | ||
`Failed to parse parentCategories:"${invalidParentCategories.join( | ||
',', | ||
)}".\n` + | ||
`Expected one of: \n${Object.values(DataCategoryType).join('\n')}`, | ||
), | ||
); | ||
process.exit(1); | ||
} | ||
|
||
try { | ||
// Create a GraphQL client | ||
const client = buildTranscendGraphQLClient(transcendUrl, auth); | ||
|
||
const entries = await pullUnstructuredSubDataPointRecommendations(client, { | ||
dataSiloIds: splitCsvToList(dataSiloIds), | ||
includeGuessedCategories: includeGuessedCategories === 'true', | ||
parentCategories: parsedParentCategories, | ||
subCategories: splitCsvToList(subCategories), // TODO: https://transcend.height.app/T-40482 - do by name not ID | ||
}); | ||
|
||
logger.info(colors.magenta(`Writing entries to file "${file}"...`)); | ||
let headers: string[] = []; | ||
const inputs = entries.map((entry) => { | ||
const result = { | ||
'Property ID': entry.id, | ||
'Data Silo': entry.dataSiloId, // FIXME | ||
Object: entry.scannedObjectId, // FIXME | ||
'Object Path': entry.scannedObjectPathId, // FIXME | ||
Property: entry.name, | ||
'Data Categories': entry.categories | ||
.map((category) => `${category.category}:${category.name}`) | ||
.join(', '), | ||
// 'Guessed Category': entry.pendingCategoryGuesses?.[0] | ||
// ? `${entry.pendingCategoryGuesses![0]!.category.category}:${ | ||
// entry.pendingCategoryGuesses![0]!.category.name | ||
// }` | ||
// : '', | ||
}; | ||
headers = uniq([...headers, ...Object.keys(result)]); | ||
return result; | ||
}); | ||
writeCsv(file, inputs, headers); | ||
} catch (err) { | ||
logger.error( | ||
colors.red(`An error occurred syncing the entries: ${err.message}`), | ||
); | ||
process.exit(1); | ||
} | ||
|
||
// Indicate success | ||
logger.info(colors.green(`Successfully synced entries to disk at ${file}!`)); | ||
} | ||
|
||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
export * from './pullAllDatapoints'; | ||
export * from './pullUnstructuredSubDataPointRecommendations'; |
176 changes: 176 additions & 0 deletions
176
src/data-inventory/pullUnstructuredSubDataPointRecommendations.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import cliProgress from 'cli-progress'; | ||
import { gql } from 'graphql-request'; | ||
import colors from 'colors'; | ||
import sortBy from 'lodash/sortBy'; | ||
import type { GraphQLClient } from 'graphql-request'; | ||
import type { DataCategoryInput } from '../codecs'; | ||
import type { UnstructuredSubDataPointRecommendationStatus } from '../enums'; | ||
import { SUB_DATA_POINTS_COUNT, makeGraphQLRequest } from '../graphql'; | ||
import { logger } from '../logger'; | ||
import type { DatapointFilterOptions } from './pullAllDatapoints'; | ||
|
||
interface UnstructuredSubDataPointRecommendationCsvPreview { | ||
/** ID of subDatapoint */ | ||
id: string; | ||
/** Name (or key) of the subdatapoint */ | ||
name: string; | ||
/** Personal data category */ | ||
categories: DataCategoryInput[]; | ||
/** Scanned object ID */ | ||
scannedObjectId: string; | ||
/** Scanned object path ID */ | ||
scannedObjectPathId: string; | ||
/** The data silo ID */ | ||
dataSiloId: string; | ||
/** Data category guesses that are output by the classifier */ | ||
pendingCategoryGuesses?: { | ||
/** Data category being guessed */ | ||
category: DataCategoryInput; | ||
/** Status of recommendation */ | ||
status: UnstructuredSubDataPointRecommendationStatus; | ||
/** classifier version that produced the guess */ | ||
classifierVersion: number; | ||
}[]; | ||
} | ||
|
||
/** | ||
* Pull unstructured subdatapoint information | ||
* | ||
* @param client - Client to use for the request | ||
* @param options - Options | ||
*/ | ||
export async function pullUnstructuredSubDataPointRecommendations( | ||
client: GraphQLClient, | ||
{ | ||
dataSiloIds = [], | ||
// includeGuessedCategories, | ||
parentCategories = [], | ||
subCategories = [], | ||
pageSize = 1000, | ||
}: DatapointFilterOptions & { | ||
/** Page size to pull in */ | ||
pageSize?: number; | ||
} = {}, | ||
): Promise<UnstructuredSubDataPointRecommendationCsvPreview[]> { | ||
const unstructuredSubDataPointRecommendations: UnstructuredSubDataPointRecommendationCsvPreview[] = | ||
[]; | ||
|
||
// Time duration | ||
const t0 = new Date().getTime(); | ||
|
||
// create a new progress bar instance and use shades_classic theme | ||
const progressBar = new cliProgress.SingleBar( | ||
{}, | ||
cliProgress.Presets.shades_classic, | ||
); | ||
|
||
// Filters | ||
const filterBy = { | ||
...(parentCategories.length > 0 ? { category: parentCategories } : {}), | ||
...(subCategories.length > 0 ? { subCategoryIds: subCategories } : {}), | ||
// if parentCategories or subCategories and not includeGuessedCategories | ||
// ...(parentCategories.length + subCategories.length > 0 && | ||
// !includeGuessedCategories | ||
// ? // then only show data points with approved data categories | ||
// // FIXME should include validated, corrected, manually added; should exclude classified and rejected | ||
// { status: UnstructuredSubDataPointRecommendationStatus.Validated } | ||
// : {}), | ||
...(dataSiloIds.length > 0 ? { dataSilos: dataSiloIds } : {}), | ||
}; | ||
|
||
// Build a GraphQL client | ||
const { | ||
unstructuredSubDataPointRecommendations: { totalCount }, | ||
} = await makeGraphQLRequest<{ | ||
/** Query response */ | ||
unstructuredSubDataPointRecommendations: { | ||
/** Count */ | ||
totalCount: number; | ||
}; | ||
}>(client, SUB_DATA_POINTS_COUNT, { | ||
filterBy, | ||
}); | ||
|
||
logger.info(colors.magenta('[Step 1/3] Pulling in all subdatapoints')); | ||
|
||
progressBar.start(totalCount, 0); | ||
let total = 0; | ||
let shouldContinue = false; | ||
let cursor: string | undefined; | ||
let offset = 0; | ||
do { | ||
try { | ||
const { | ||
unstructuredSubDataPointRecommendations: { nodes }, | ||
// eslint-disable-next-line no-await-in-loop | ||
} = await makeGraphQLRequest<{ | ||
/** Query response */ | ||
unstructuredSubDataPointRecommendations: { | ||
/** List of matches */ | ||
nodes: UnstructuredSubDataPointRecommendationCsvPreview[]; | ||
}; | ||
}>( | ||
client, // FIXME below incomplete | ||
gql` | ||
query TranscendCliUnstructuredSubDataPointRecommendationCsvExport( | ||
$filterBy: SubDataPointFiltersInput | ||
$first: Int! | ||
$offset: Int! | ||
) { | ||
unstructuredSubDataPointRecommendations( | ||
filterBy: $filterBy | ||
first: $first | ||
offset: $offset | ||
useMaster: false | ||
) { | ||
nodes { | ||
id | ||
name | ||
categories { | ||
name | ||
category | ||
} | ||
} | ||
} | ||
} | ||
`, | ||
{ | ||
first: pageSize, | ||
offset, | ||
filterBy: { | ||
...filterBy, | ||
}, | ||
}, | ||
); | ||
|
||
cursor = nodes[nodes.length - 1]?.id as string; | ||
unstructuredSubDataPointRecommendations.push(...nodes); | ||
shouldContinue = nodes.length === pageSize; | ||
total += nodes.length; | ||
offset += nodes.length; | ||
progressBar.update(total); | ||
} catch (err) { | ||
logger.error( | ||
colors.red( | ||
`An error fetching subdatapoints for cursor ${cursor} and offset ${offset}`, | ||
), | ||
); | ||
throw err; | ||
} | ||
} while (shouldContinue); | ||
|
||
progressBar.stop(); | ||
const t1 = new Date().getTime(); | ||
const totalTime = t1 - t0; | ||
|
||
const sorted = sortBy(unstructuredSubDataPointRecommendations, 'name'); | ||
|
||
logger.info( | ||
colors.green( | ||
`Successfully pulled in ${sorted.length} subdatapoints in ${ | ||
totalTime / 1000 | ||
} seconds!`, | ||
), | ||
); | ||
return sorted; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters