diff --git a/weave-js/package.json b/weave-js/package.json index ff7cd60f6166..dee07b087a9a 100644 --- a/weave-js/package.json +++ b/weave-js/package.json @@ -63,6 +63,7 @@ "@testing-library/jest-dom": "6.4.5", "@testing-library/react": "11.2.3", "@testing-library/user-event": "14.4.3", + "@types/papaparse": "^5.3.15", "@types/query-string": "^6.3.0", "@types/react-hook-mousetrap": "^2.0.2", "@types/react-syntax-highlighter": "^15.5.7", @@ -97,6 +98,7 @@ "numeral": "^2.0.6", "onchange": "^7.1.0", "pako": "^2.1.0", + "papaparse": "^5.5.2", "pca-js": "^1.0.2", "plotly.js": "^2.23.2", "plotly.js-dist-min": "^2.6.3", diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CellRenderers.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CellRenderers.tsx index d705a1df0541..952da0baeeea 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CellRenderers.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CellRenderers.tsx @@ -44,6 +44,7 @@ interface CellViewingRendererProps { isNew?: boolean; isEditing?: boolean; serverValue?: any; + disableNewRowHighlight?: boolean; } export const CellViewingRenderer: React.FC< @@ -58,6 +59,7 @@ export const CellViewingRenderer: React.FC< id, field, serverValue, + disableNewRowHighlight = false, }) => { const [isHovered, setIsHovered] = useState(false); const {setEditedRows, setAddedRows, setFieldEdited} = useDatasetEditContext(); @@ -98,7 +100,7 @@ export const CellViewingRenderer: React.FC< if (isEdited) { return CELL_COLORS.EDITED; } - if (isNew) { + if (isNew && !disableNewRowHighlight) { return CELL_COLORS.NEW; } return CELL_COLORS.TRANSPARENT; @@ -636,6 +638,7 @@ export interface ControlCellProps { isDeleted: boolean; isNew: boolean; hideRemoveForAddedRows?: boolean; + disableNewRowHighlight?: boolean; } export const ControlCell: React.FC = ({ @@ -646,6 +649,7 @@ export const ControlCell: React.FC = ({ isDeleted, isNew, hideRemoveForAddedRows, + disableNewRowHighlight = false, }) => { const rowId = params.id as string; const rowIndex = params.row.___weave?.index; @@ -660,7 +664,9 @@ export const ControlCell: React.FC = ({ justifyContent: 'center', height: '100%', width: '100%', - backgroundColor: CELL_COLORS.NEW, + backgroundColor: disableNewRowHighlight + ? CELL_COLORS.TRANSPARENT + : CELL_COLORS.NEW, }} /> ); @@ -676,7 +682,7 @@ export const ControlCell: React.FC = ({ width: '100%', backgroundColor: isDeleted ? CELL_COLORS.DELETED - : isNew + : isNew && !disableNewRowHighlight ? CELL_COLORS.NEW : CELL_COLORS.TRANSPARENT, opacity: isDeleted ? DELETED_CELL_STYLES.opacity : 1, diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawer.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawer.tsx new file mode 100644 index 000000000000..b4da700cccc0 --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawer.tsx @@ -0,0 +1,390 @@ +import {Box, Typography} from '@mui/material'; +import React, {useCallback, useRef, useState} from 'react'; +import {toast} from 'react-toastify'; + +import {GLOBAL_COLORS} from '../../../../../common/util/colors'; +import {Button} from '../../../../Button'; +import {TextField} from '../../../../Form/TextField'; +import {WaveLoader} from '../../../../Loaders/WaveLoader'; +import {ResizableDrawer} from '../pages/common/ResizableDrawer'; +import { + CREATE_DATASET_ACTIONS, + CreateDatasetProvider, + useCreateDatasetContext, +} from './CreateDatasetDrawerContext'; +import {validateDatasetName} from './datasetNameValidation'; +import {EditableDatasetView} from './EditableDatasetView'; + +// Define typography style with Source Sans Pro font +const typographyStyle = {fontFamily: 'Source Sans Pro'}; + +interface CreateDatasetDrawerProps { + open: boolean; + onClose: () => void; + onSaveDataset: (dataset: any) => void; + isCreating?: boolean; +} + +export const CreateDatasetDrawer: React.FC = ({ + open, + onClose, + onSaveDataset, + isCreating = false, +}) => { + return ( + + + + ); +}; + +const CreateDatasetDrawerContent: React.FC<{ + open: boolean; + onClose: () => void; + isCreating?: boolean; +}> = ({open, onClose, isCreating = false}) => { + const { + state, + dispatch, + parseCSVFile, + handleCloseDrawer, + handlePublishDataset, + clearDataset, + } = useCreateDatasetContext(); + + const {datasetName, parsedData, isLoading, error, drawerWidth, isFullscreen} = + state; + + const fileInputRef = useRef(null); + const [isDragging, setIsDragging] = useState(false); + const [nameError, setNameError] = useState(null); + + const handleNameChange = useCallback( + (value: string) => { + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_DATASET_NAME, + payload: value, + }); + + const validationResult = validateDatasetName(value); + setNameError(validationResult.error); + }, + [dispatch] + ); + + const handleFileChange = useCallback( + async (event: React.ChangeEvent) => { + const file = event.target.files?.[0]; + if (file) { + await parseCSVFile(file); + } + }, + [parseCSVFile] + ); + + const handleUploadClick = useCallback(() => { + fileInputRef.current?.click(); + }, []); + + const wrappedOnClose = useCallback(() => { + handleCloseDrawer(); + onClose(); + }, [handleCloseDrawer, onClose]); + + // Handle drag events + const handleDragEnter = useCallback((e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(true); + }, []); + + const handleDragOver = useCallback((e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + }, []); + + const handleDragLeave = useCallback((e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + }, []); + + const handleDrop = useCallback( + async (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + + const files = e.dataTransfer.files; + if (files.length > 0) { + const file = files[0]; + if (file.type === 'text/csv' || file.name.endsWith('.csv')) { + await parseCSVFile(file); + } else { + toast.error('Please upload a CSV file'); + } + } + }, + [parseCSVFile] + ); + + const handleToggleFullscreen = useCallback(() => { + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_IS_FULLSCREEN, + payload: !isFullscreen, + }); + }, [dispatch, isFullscreen]); + + const handleClearDataset = useCallback(() => { + clearDataset(); + }, [clearDataset]); + + return ( + + !isFullscreen && + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_DRAWER_WIDTH, + payload: width, + }) + } + headerContent={ + + + Create New Dataset + + + {parsedData && ( + + + {isDragging && ( + + Release to upload + + )} + + ) : ( + + + + )} + + + {/* Publish button at the bottom */} + {parsedData && ( + + + + )} + + )} + + + ); +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawerContext.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawerContext.tsx new file mode 100644 index 000000000000..65dc69f72c31 --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/CreateDatasetDrawerContext.tsx @@ -0,0 +1,268 @@ +import React, { + createContext, + Dispatch, + useCallback, + useContext, + useReducer, +} from 'react'; + +import {sanitizeObjectId} from '../pages/wfReactInterface/traceServerDirectClient'; +import {parseCSV} from './csvUtils'; +import { + DatasetEditProvider, + useDatasetEditContext, +} from './DatasetEditorContext'; +import {DatasetObjectVal} from './EditableDatasetView'; + +// Action type constants +export const CREATE_DATASET_ACTIONS = { + SET_IS_OPEN: 'SET_IS_OPEN', + SET_DATASET_NAME: 'SET_DATASET_NAME', + SET_DATASET_DESCRIPTION: 'SET_DATASET_DESCRIPTION', + SET_PARSED_DATA: 'SET_PARSED_DATA', + SET_IS_LOADING: 'SET_IS_LOADING', + SET_ERROR: 'SET_ERROR', + SET_DRAWER_WIDTH: 'SET_DRAWER_WIDTH', + SET_IS_FULLSCREEN: 'SET_IS_FULLSCREEN', + RESET: 'RESET', +} as const; + +// State interface +export interface CreateDatasetState { + isOpen: boolean; + datasetName: string; + parsedData: DatasetObjectVal | null; + isLoading: boolean; + error: string | null; + drawerWidth: number; + isFullscreen: boolean; +} + +// Action types +export type CreateDatasetAction = + | {type: typeof CREATE_DATASET_ACTIONS.SET_IS_OPEN; payload: boolean} + | {type: typeof CREATE_DATASET_ACTIONS.SET_DATASET_NAME; payload: string} + | { + type: typeof CREATE_DATASET_ACTIONS.SET_PARSED_DATA; + payload: DatasetObjectVal | null; + } + | {type: typeof CREATE_DATASET_ACTIONS.SET_IS_LOADING; payload: boolean} + | {type: typeof CREATE_DATASET_ACTIONS.SET_ERROR; payload: string | null} + | {type: typeof CREATE_DATASET_ACTIONS.SET_DRAWER_WIDTH; payload: number} + | {type: typeof CREATE_DATASET_ACTIONS.SET_IS_FULLSCREEN; payload: boolean} + | {type: typeof CREATE_DATASET_ACTIONS.RESET}; + +// Initial state +const initialState: CreateDatasetState = { + isOpen: false, + datasetName: '', + parsedData: null, + isLoading: false, + error: null, + drawerWidth: 800, + isFullscreen: false, +}; + +// Reducer function +function createDatasetReducer( + state: CreateDatasetState, + action: CreateDatasetAction +): CreateDatasetState { + switch (action.type) { + case CREATE_DATASET_ACTIONS.SET_IS_OPEN: + return {...state, isOpen: action.payload}; + case CREATE_DATASET_ACTIONS.SET_DATASET_NAME: + return {...state, datasetName: action.payload}; + case CREATE_DATASET_ACTIONS.SET_PARSED_DATA: + return {...state, parsedData: action.payload}; + case CREATE_DATASET_ACTIONS.SET_IS_LOADING: + return {...state, isLoading: action.payload}; + case CREATE_DATASET_ACTIONS.SET_ERROR: + return {...state, error: action.payload}; + case CREATE_DATASET_ACTIONS.SET_DRAWER_WIDTH: + return {...state, drawerWidth: action.payload}; + case CREATE_DATASET_ACTIONS.SET_IS_FULLSCREEN: + return {...state, isFullscreen: action.payload}; + case CREATE_DATASET_ACTIONS.RESET: + return initialState; + default: + return state; + } +} + +// Context interface +interface CreateDatasetContextType { + state: CreateDatasetState; + dispatch: Dispatch; + parseCSVFile: (file: File) => Promise; + handleCloseDrawer: () => void; + handlePublishDataset: () => void; + clearDataset: () => void; + editorContext: ReturnType; +} + +// Create the context +const CreateDatasetContext = createContext< + CreateDatasetContextType | undefined +>(undefined); + +// Provider component +export const CreateDatasetProvider: React.FC<{ + children: React.ReactNode; + onPublishDataset: (dataset: any) => void; +}> = ({children, onPublishDataset}) => { + return ( + + + {children} + + + ); +}; + +// Inner provider that has access to the editor context +const CreateDatasetProviderInner: React.FC<{ + children: React.ReactNode; + onPublishDataset: (dataset: any) => void; +}> = ({children, onPublishDataset}) => { + const [state, dispatch] = useReducer(createDatasetReducer, initialState); + const editorContext = useDatasetEditContext(); + + const parseCSVFile = useCallback( + async (file: File) => { + dispatch({type: CREATE_DATASET_ACTIONS.SET_IS_LOADING, payload: true}); + dispatch({type: CREATE_DATASET_ACTIONS.SET_ERROR, payload: null}); + + try { + // If filename is like "dataset.csv", extract "dataset" as default name + const fileName = file.name.split('.').slice(0, -1).join('.'); + if (fileName) { + const sanitizedName = sanitizeObjectId(fileName); + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_DATASET_NAME, + payload: sanitizedName, + }); + } + + const result = await parseCSV(file); + + if (result.errors.length > 0) { + const errorMessage = result.errors + .map(err => `Row ${err.row}: ${err.message}`) + .join('\n'); + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_ERROR, + payload: `CSV parsing errors:\n${errorMessage}`, + }); + return; + } + + // Transform the data into the format expected by EditableDatasetView + // The data has already been cast to the appropriate types by parseCSV + const transformedRows = result.data.map((row, index) => ({ + ...row, + ___weave: { + id: `row-${index}`, + index, + isNew: true, + }, + })); + + // Create a Map of the transformed rows for the editor context + const rowsMap = new Map( + transformedRows.map(row => [row.___weave.id, row]) + ); + + const transformedData: DatasetObjectVal = { + _type: 'Dataset', + name: state.datasetName || null, + description: null, + rows: JSON.stringify(transformedRows), + _class_name: 'Dataset', + _bases: ['Object', 'BaseModel'], + }; + + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_PARSED_DATA, + payload: transformedData, + }); + + // Initialize the editor context with the transformed rows + editorContext.setEditedRows(new Map()); + editorContext.setDeletedRows([]); + editorContext.setAddedRows(rowsMap); + } catch (error) { + dispatch({ + type: CREATE_DATASET_ACTIONS.SET_ERROR, + payload: + error instanceof Error ? error.message : 'Failed to parse CSV file', + }); + } finally { + dispatch({type: CREATE_DATASET_ACTIONS.SET_IS_LOADING, payload: false}); + } + }, + [dispatch, state.datasetName, editorContext] + ); + + // Handle drawer close + const handleCloseDrawer = useCallback(() => { + dispatch({type: CREATE_DATASET_ACTIONS.SET_IS_OPEN, payload: false}); + dispatch({type: CREATE_DATASET_ACTIONS.SET_PARSED_DATA, payload: null}); + editorContext.resetEditState(); + }, [editorContext]); + + // Handle publish dataset + const handlePublishDataset = useCallback(() => { + if (state.parsedData) { + // Get the updated rows from the editor context + const rows = editorContext.getRowsNoMeta(); + + // Update the dataset with the edited rows and mark it for publication + const updatedDataset = { + ...state.parsedData, + name: state.datasetName, + rows: JSON.stringify(rows), + publishNow: true, + }; + + // Call the onSaveDataset callback with the publish flag + onPublishDataset(updatedDataset); + + // Reset the state + dispatch({type: CREATE_DATASET_ACTIONS.RESET}); + editorContext.resetEditState(); + } + }, [state.parsedData, state.datasetName, editorContext, onPublishDataset]); + + // Handle clear dataset + const clearDataset = useCallback(() => { + dispatch({type: CREATE_DATASET_ACTIONS.SET_PARSED_DATA, payload: null}); + editorContext.resetEditState(); + }, [editorContext]); + + return ( + + {children} + + ); +}; + +// Hook for using the context +export const useCreateDatasetContext = () => { + const context = useContext(CreateDatasetContext); + if (context === undefined) { + throw new Error( + 'useCreateDatasetContext must be used within a CreateDatasetProvider' + ); + } + return context; +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetPublishToast.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetPublishToast.tsx index 85ad03131472..f9ed8112efbf 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetPublishToast.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/DatasetPublishToast.tsx @@ -21,11 +21,25 @@ export const DatasetPublishToast: React.FC = ({ url, message, }) => ( - + + sx={{ + color: 'white', + fontSize: '16px', + fontFamily: 'Source Sans Pro', + whiteSpace: 'nowrap', + overflow: 'hidden', + textOverflow: 'ellipsis', + }}> {message} diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/EditableDatasetView.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/EditableDatasetView.tsx index d2384be83908..4881ff2f1252 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/EditableDatasetView.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/EditableDatasetView.tsx @@ -46,7 +46,7 @@ import {useDatasetEditContext} from './DatasetEditorContext'; const ADDED_ROW_ID_PREFIX = 'new-'; // Dataset object schema as it is stored in the database. -interface DatasetObjectVal { +export interface DatasetObjectVal { _type: 'Dataset'; name: string | null; description: string | null; @@ -60,6 +60,8 @@ export interface EditableDatasetViewProps { isEditing?: boolean; hideRemoveForAddedRows?: boolean; showAddRowButton?: boolean; + hideIdColumn?: boolean; + disableNewRowHighlight?: boolean; } interface OrderedRow { @@ -72,6 +74,8 @@ export const EditableDatasetView: React.FC = ({ isEditing = false, hideRemoveForAddedRows = false, showAddRowButton = true, + hideIdColumn = false, + disableNewRowHighlight = false, }) => { const {useTableRowsQuery, useTableQueryStats} = useWFHooks(); const [sortBy, setSortBy] = useState([]); @@ -334,8 +338,12 @@ export const EditableDatasetView: React.FC = ({ setInitialFields(Array.from(allFields)); } - const baseColumns: GridColDef[] = [ - { + // Create an array to hold all base columns + const baseColumns: GridColDef[] = []; + + // Add ID column only if not hidden + if (!hideIdColumn) { + baseColumns.push({ field: '_row_click', headerName: 'id', sortable: false, @@ -368,7 +376,7 @@ export const EditableDatasetView: React.FC = ({ params.row.___weave?.index ) ? CELL_COLORS.DELETED - : params.row.___weave?.isNew + : params.row.___weave?.isNew && !disableNewRowHighlight ? CELL_COLORS.NEW : CELL_COLORS.TRANSPARENT, }}> @@ -378,31 +386,32 @@ export const EditableDatasetView: React.FC = ({ ); }, - }, - ...(isEditing - ? [ - { - field: 'controls', - headerName: '', - width: columnWidths.controls ?? 48, - sortable: false, - filterable: false, - editable: false, - renderCell: (params: GridRenderCellParams) => ( - - ), - }, - ] - : []), - ]; + }); + } + + // Add control column if editing is enabled, regardless of hideIdColumn setting + if (isEditing) { + baseColumns.push({ + field: 'controls', + headerName: '', + width: columnWidths.controls ?? 48, + sortable: false, + filterable: false, + editable: false, + renderCell: (params: GridRenderCellParams) => ( + + ), + }); + } const fieldColumns: GridColDef[] = Array.from(allFields).map(field => ({ field: field as string, @@ -442,6 +451,7 @@ export const EditableDatasetView: React.FC = ({ loadedRows[rowIndex - offset]?.val ?? {}, field as string )} + disableNewRowHighlight={disableNewRowHighlight} /> ); }, @@ -477,6 +487,8 @@ export const EditableDatasetView: React.FC = ({ preserveFieldOrder, hideRemoveForAddedRows, isFieldEdited, + hideIdColumn, + disableNewRowHighlight, ]); const handleColumnWidthChange = useCallback((params: any) => { diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/SelectDatasetStep.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/SelectDatasetStep.tsx index d34f566412be..aee5280a3044 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/SelectDatasetStep.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/SelectDatasetStep.tsx @@ -12,6 +12,7 @@ import {SmallRef} from '../smallRef/SmallRef'; import {DataPreviewTooltip} from './DataPreviewTooltip'; import {ACTION_TYPES, useDatasetDrawer} from './DatasetDrawerContext'; import {useDatasetEditContext} from './DatasetEditorContext'; +import {validateDatasetName} from './datasetNameValidation'; const typographyStyle = {fontFamily: 'Source Sans Pro'}; @@ -199,41 +200,9 @@ export const SelectDatasetStep: React.FC = ({ const handleNameChange = (value: string) => { setNewDatasetName(value); - if (!value.trim()) { - setError(null); - onValidationChange(false); - return; - } - - try { - // First check if it starts with a letter or number - if (!/^[a-zA-Z0-9]/.test(value)) { - setError('Dataset name must start with a letter or number'); - onValidationChange(false); - return; - } - - // Then check if it only contains allowed characters - if (!/^[a-zA-Z0-9\-_]+$/.test(value)) { - const invalidChars = [ - ...new Set( - value - .split('') - .filter(c => !/[a-zA-Z0-9\-_]/.test(c)) - .map(c => (c === ' ' ? '' : c)) - ), - ].join(', '); - setError(`Invalid characters found: ${invalidChars}`); - onValidationChange(false); - return; - } - - setError(null); - onValidationChange(true); - } catch (e) { - setError(e instanceof Error ? e.message : 'Invalid dataset name'); - onValidationChange(false); - } + const validationResult = validateDatasetName(value); + setError(validationResult.error); + onValidationChange(validationResult.isValid); }; const filteredDatasets = useMemo(() => { diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/__tests__/csvUtils.test.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/__tests__/csvUtils.test.ts new file mode 100644 index 000000000000..f745aec91ce4 --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/__tests__/csvUtils.test.ts @@ -0,0 +1,166 @@ +import {analyzeColumns, detectDataType, parseCSV} from '../csvUtils'; + +describe('detectDataType', () => { + test('detects null values', () => { + expect(detectDataType(null)).toBe('null'); + expect(detectDataType(undefined)).toBe('null'); + expect(detectDataType('')).toBe('null'); + }); + + test('detects number values', () => { + expect(detectDataType('123')).toBe('number'); + expect(detectDataType('-123.45')).toBe('number'); + expect(detectDataType('0')).toBe('number'); + expect(detectDataType(123)).toBe('number'); + }); + + test('detects boolean values', () => { + expect(detectDataType('true')).toBe('boolean'); + expect(detectDataType('false')).toBe('boolean'); + expect(detectDataType('TRUE')).toBe('boolean'); + expect(detectDataType('FALSE')).toBe('boolean'); + }); + + test('detects date values', () => { + expect(detectDataType('2024-03-15')).toBe('date'); + expect(detectDataType('03/15/2024')).toBe('date'); + expect(detectDataType('2024-03-15T10:30:00')).toBe('date'); + }); + + test('detects string values', () => { + expect(detectDataType('hello')).toBe('string'); + expect(detectDataType('123abc')).toBe('string'); + expect(detectDataType('not-a-date')).toBe('string'); + }); + + test('handles edge cases', () => { + expect(detectDataType('NaN')).toBe('string'); + expect(detectDataType('Infinity')).toBe('string'); + expect(detectDataType('null')).toBe('string'); + expect(detectDataType('undefined')).toBe('string'); + }); +}); + +describe('analyzeColumns', () => { + test('handles empty data', () => { + expect(analyzeColumns([])).toEqual([]); + }); + + test('analyzes simple columns', () => { + const data = [ + {name: 'John', age: '25', active: 'true'}, + {name: 'Jane', age: '30', active: 'false'}, + ]; + + const result = analyzeColumns(data); + expect(result).toEqual([ + {name: 'name', type: 'string', sample: 'John'}, + {name: 'age', type: 'number', sample: '25'}, + {name: 'active', type: 'boolean', sample: 'true'}, + ]); + }); + + test('handles mixed types in columns', () => { + const data = [{value: '123'}, {value: 'abc'}, {value: '456'}]; + + const result = analyzeColumns(data); + expect(result).toEqual([ + {name: 'value', type: 'string', sample: '123'}, // Falls back to string + ]); + }); + + test('handles columns with all null values', () => { + const data = [{empty: ''}, {empty: null}, {empty: undefined}]; + + const result = analyzeColumns(data); + expect(result).toEqual([{name: 'empty', type: 'null', sample: null}]); + }); + + test('handles date columns', () => { + const data = [ + {date: '2024-03-15'}, + {date: '2024-03-16'}, + {date: '2024-03-17'}, + ]; + + const result = analyzeColumns(data); + expect(result).toEqual([ + {name: 'date', type: 'date', sample: '2024-03-15'}, + ]); + }); +}); + +describe('parseCSV', () => { + // Helper function to create a File object from a string + const createCSVFile = (content: string): File => { + return new File([content], 'test.csv', {type: 'text/csv'}); + }; + + test('parses basic CSV with headers', async () => { + const csv = 'name,age\nJohn,25\nJane,30'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.data).toEqual([ + {name: 'John', age: '25'}, + {name: 'Jane', age: '30'}, + ]); + expect(result.errors).toEqual([]); + expect(result.meta.columns).toEqual([ + {name: 'name', type: 'string', sample: 'John'}, + {name: 'age', type: 'number', sample: '25'}, + ]); + }); + + test('handles CSV with mixed data types', async () => { + const csv = 'col1,col2,col3\n123,true,2024-03-15\nabc,false,2024-03-16'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.meta.columns).toEqual([ + {name: 'col1', type: 'string', sample: '123'}, // Mixed types + {name: 'col2', type: 'boolean', sample: 'true'}, + {name: 'col3', type: 'date', sample: '2024-03-15'}, + ]); + }); + + test('handles empty CSV', async () => { + const csv = 'col1,col2\n'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.data).toEqual([]); + expect(result.meta.totalRows).toBe(0); + }); + + test('handles CSV with missing values', async () => { + const csv = 'name,age,city\nJohn,25,\nJane,,New York'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.data).toEqual([ + {name: 'John', age: '25', city: ''}, + {name: 'Jane', age: '', city: 'New York'}, + ]); + }); + + test('handles CSV with different delimiters', async () => { + const csv = 'name;age;city\nJohn;25;New York\nJane;30;Boston'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.meta.delimiter).toBe(';'); + expect(result.data).toEqual([ + {name: 'John', age: '25', city: 'New York'}, + {name: 'Jane', age: '30', city: 'Boston'}, + ]); + }); + + test('handles malformed CSV', async () => { + const csv = 'col1,col2\n1,2,3\n4,5'; + const file = createCSVFile(csv); + + const result = await parseCSV(file); + expect(result.errors.length).toBeGreaterThan(0); + }); +}); diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/csvUtils.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/csvUtils.ts new file mode 100644 index 000000000000..e3ae84ce6a0c --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/csvUtils.ts @@ -0,0 +1,268 @@ +import Papa, {ParseError, ParseResult as PapaParseResult} from 'papaparse'; + +export interface ParsedColumn { + name: string; + type: 'string' | 'number' | 'boolean' | 'date' | 'null'; + sample: any; +} + +export interface ParseResult { + data: any[]; + errors: Array<{message: string; row?: number}>; + meta: { + delimiter: string; + linebreak: string; + columns: ParsedColumn[]; + totalRows: number; + encoding: string; + }; +} + +export const detectDataType = (value: any): ParsedColumn['type'] => { + if (value === null || value === undefined || value === '') { + return 'null'; + } + + // If it's already a boolean type, immediately return boolean + if (typeof value === 'boolean') { + return 'boolean'; + } + + if ( + typeof value === 'string' && + (value === 'Infinity' || value === '-Infinity') + ) { + return 'string'; + } + + // Check for booleans before numbers to avoid treating "true"/"false" as NaN + if ( + typeof value === 'string' && + (value.toLowerCase() === 'true' || value.toLowerCase() === 'false') + ) { + return 'boolean'; + } + + if (!isNaN(Number(value)) && value.toString().trim() !== '') { + return 'number'; + } + + // Try parsing as date - check for common date formats + if (typeof value === 'string') { + const date = new Date(value); + // Ensure it's a valid date and the original string somewhat looks like a date + // to avoid false positives + if ( + !isNaN(date.getTime()) && + (value.includes('-') || value.includes('/') || /\d{8,}/.test(value)) + ) { + return 'date'; + } + } + + return 'string'; +}; + +export const analyzeColumns = (data: any[]): ParsedColumn[] => { + if (data.length === 0) { + return []; + } + + const firstRow = data[0]; + const columns: ParsedColumn[] = []; + + Object.keys(firstRow).forEach(key => { + // Look at first 100 non-null values to better determine type + const values = data + .slice(0, 100) + .map(row => row[key]) + .filter(val => val !== null && val !== undefined && val !== ''); + + const sample = values[0]; + + // If no valid values found, mark as null type + if (values.length === 0) { + columns.push({ + name: key, + type: 'null', + sample: null, + }); + return; + } + + // Special handling for potential boolean columns + // Check if all values are either true, false, "true", "false", "yes", "no", "1", "0", etc. + const isPotentialBooleanColumn = values.every(val => { + if (typeof val === 'boolean') { return true; } + if (typeof val === 'string') { + const normalized = val.toLowerCase().trim(); + return ['true', 'false', 'yes', 'no', 'y', 'n', '1', '0'].includes( + normalized + ); + } + if (typeof val === 'number') { + return val === 0 || val === 1; + } + return false; + }); + + if (isPotentialBooleanColumn && values.length > 0) { + columns.push({ + name: key, + type: 'boolean', + sample, + }); + return; + } + + // Continue with regular type detection if not a boolean column + const initialType = detectDataType(sample); + const allSameType = values.every( + val => detectDataType(val) === initialType + ); + + columns.push({ + name: key, + type: allSameType ? initialType : 'string', // Fall back to string if mixed types + sample, + }); + }); + + return columns; +}; + +// Cast a single value based on the specified type +export const castValueToType = ( + value: any, + type: ParsedColumn['type'] +): any => { + if (value === null || value === undefined || value === '') { + return null; + } + + // If the value is already the correct type, return it directly + if ( + (type === 'boolean' && typeof value === 'boolean') || + (type === 'number' && typeof value === 'number') || + (type === 'string' && typeof value === 'string') + ) { + return value; + } + + switch (type) { + case 'number': + const num = Number(value); + return isNaN(num) ? value : num; + case 'boolean': + // Already a boolean + if (typeof value === 'boolean') { + return value; + } + // String representation of boolean + if (typeof value === 'string') { + const lowered = value.toLowerCase().trim(); + if ( + lowered === 'true' || + lowered === 'yes' || + lowered === '1' || + lowered === 'y' + ) { + return true; + } + if ( + lowered === 'false' || + lowered === 'no' || + lowered === '0' || + lowered === 'n' + ) { + return false; + } + } + // Number 1 or 0 + if (typeof value === 'number') { + if (value === 1) { return true; } + if (value === 0) { return false; } + } + return value; + case 'date': + const date = new Date(value); + return isNaN(date.getTime()) ? value : date; + case 'null': + return null; + case 'string': + default: + return String(value); + } +}; + +// Process all data and cast values based on column types +export const castDataWithColumnTypes = ( + data: any[], + columns: ParsedColumn[] +): any[] => { + if (data.length === 0 || columns.length === 0) { + return data; + } + + // Create a map of column names to their types for easy lookup + const columnTypes = new Map(); + columns.forEach(col => columnTypes.set(col.name, col.type)); + + // Cast each value in the data based on its column type + return data.map(row => { + const castedRow: Record = {}; + + Object.keys(row).forEach(key => { + const type = columnTypes.get(key) || 'string'; + castedRow[key] = castValueToType(row[key], type); + }); + + return castedRow; + }); +}; + +export const parseCSV = async (file: File): Promise => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = e => { + const buffer = e.target?.result as ArrayBuffer; + const decoder = new TextDecoder(); + const text = decoder.decode(buffer); + + Papa.parse(text, { + header: true, + skipEmptyLines: true, + encoding: 'UTF-8', + complete: (results: PapaParseResult) => { + const columns = analyzeColumns(results.data); + + // Cast the data to the appropriate types based on column analysis + const castedData = castDataWithColumnTypes(results.data, columns); + + const parseResult: ParseResult = { + data: castedData, + errors: results.errors.map((err: ParseError) => ({ + message: err.message, + row: err.row, + })), + meta: { + delimiter: results.meta.delimiter, + linebreak: results.meta.linebreak || '\n', + columns, + totalRows: results.data.length, + encoding: 'UTF-8', + }, + }; + resolve(parseResult); + }, + error: (error: Error) => { + reject(new Error(`Failed to parse CSV: ${error.message}`)); + }, + }); + }; + reader.onerror = () => { + reject(new Error('Failed to read file')); + }; + reader.readAsArrayBuffer(file); + }); +}; diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/datasetNameValidation.ts b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/datasetNameValidation.ts new file mode 100644 index 000000000000..396ee55687fb --- /dev/null +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/datasets/datasetNameValidation.ts @@ -0,0 +1,51 @@ +export interface DatasetNameValidationResult { + isValid: boolean; + error: string | null; +} + +export function validateDatasetName( + value: string +): DatasetNameValidationResult { + if (!value.trim()) { + return { + isValid: false, + error: null, + }; + } + + try { + // First check if it starts with a letter or number + if (!/^[a-zA-Z0-9]/.test(value)) { + return { + isValid: false, + error: 'Dataset name must start with a letter or number', + }; + } + + // Then check if it only contains allowed characters + if (!/^[a-zA-Z0-9\-_]+$/.test(value)) { + const invalidChars = [ + ...new Set( + value + .split('') + .filter(c => !/[a-zA-Z0-9\-_]/.test(c)) + .map(c => (c === ' ' ? '' : c)) + ), + ].join(', '); + return { + isValid: false, + error: `Invalid characters found: ${invalidChars}`, + }; + } + + return { + isValid: true, + error: null, + }; + } catch (e) { + return { + isValid: false, + error: e instanceof Error ? e.message : 'Invalid dataset name', + }; + } +} diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/DatasetsPage/DatasetsPage.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/DatasetsPage/DatasetsPage.tsx index 48c64df6e2db..c137e8b164ca 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/DatasetsPage/DatasetsPage.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/DatasetsPage/DatasetsPage.tsx @@ -7,9 +7,14 @@ import {Button} from '@wandb/weave/components/Button'; import {Tailwind} from '@wandb/weave/components/Tailwind'; import React, {useMemo, useState} from 'react'; import {useHistory} from 'react-router-dom'; +import {toast} from 'react-toastify'; import {Loading} from '../../../../../Loading'; import {useWeaveflowCurrentRouteContext} from '../../context'; +import {CreateDatasetDrawer} from '../../datasets/CreateDatasetDrawer'; +import {createNewDataset} from '../../datasets/datasetOperations'; +import {DatasetPublishToast} from '../../datasets/DatasetPublishToast'; +import {useWFHooks} from '../../pages/wfReactInterface/context'; import {SimplePageLayout} from '../common/SimplePageLayout'; import {DeleteObjectVersionsButtonWithModal} from '../ObjectsPage/ObjectDeleteButtons'; import {WFHighLevelObjectVersionFilter} from '../ObjectsPage/objectsPageTypes'; @@ -26,9 +31,18 @@ export const DatasetsPage: React.FC<{ initialFilter?: DatasetFilter; onFilterUpdate?: (filter: DatasetFilter) => void; }> = props => { + const {entity, project} = props; const history = useHistory(); const {loading: loadingUserInfo, userInfo} = useViewerInfo(); const router = useWeaveflowCurrentRouteContext(); + const {useObjCreate, useTableCreate} = useWFHooks(); + + // Get the create hooks + const tableCreate = useTableCreate(); + const objCreate = useObjCreate(); + + const [isCreateDrawerOpen, setIsCreateDrawerOpen] = useState(false); + const [isCreatingDataset, setIsCreatingDataset] = useState(false); const baseFilter = useMemo(() => { return { @@ -43,7 +57,6 @@ export const DatasetsPage: React.FC<{ props.onFilterUpdate ); - const {entity, project} = props; const [selectedVersions, setSelectedVersions] = useState([]); const onCompare = () => { @@ -57,6 +70,81 @@ export const DatasetsPage: React.FC<{ return 'Datasets'; }, [filter.objectName]); + const handleCreateDataset = () => { + setIsCreateDrawerOpen(true); + }; + + const handleCloseDrawer = () => { + setIsCreateDrawerOpen(false); + }; + + const handleSaveDataset = async (dataset: any) => { + // Log the dataset being saved for debugging purposes + console.log('Saving dataset:', dataset); + + // Check if this is a publish action + const isPublish = dataset.publishNow === true; + + setIsCreatingDataset(true); + try { + // Parse the rows from string back to array if they are provided as a string + const rows = + typeof dataset.rows === 'string' + ? JSON.parse(dataset.rows) + : dataset.rows; + + // Create the dataset using the actual API function + const result = await createNewDataset({ + projectId: `${entity}/${project}`, + entity, + project, + datasetName: dataset.name, + rows, + tableCreate, + objCreate, + router, + }); + + // If this is a publish action, we could add additional logic here + // This would require backend support for publishing datasets + if (isPublish) { + console.log('Publishing dataset:', dataset.name); + // Here you would call an API to mark the dataset as published + // For now, we'll just log and show different toast messaging + } + + // Show success message with link to the new dataset + toast( + , + { + position: 'top-right', + autoClose: 5000, + hideProgressBar: true, + closeOnClick: true, + pauseOnHover: true, + } + ); + } catch (error: any) { + console.error('Failed to create dataset:', error); + toast.error( + `Failed to ${isPublish ? 'publish' : 'create'} dataset: ${ + error.message + }` + ); + } finally { + setIsCreatingDataset(false); + // Close the drawer + handleCloseDrawer(); + } + }; + if (loadingUserInfo) { return ; } @@ -64,44 +152,55 @@ export const DatasetsPage: React.FC<{ const filteredOnObject = filter.objectName != null; const hasComparison = filteredOnObject; const viewer = userInfo ? userInfo.id : null; - const isReadonly = !viewer || !userInfo?.teams.includes(props.entity); + const isReadonly = !viewer || !userInfo?.teams.includes(entity); const isAdmin = userInfo?.admin; const showDeleteButton = filteredOnObject && !isReadonly && isAdmin; return ( - - } - tabs={[ - { - label: '', - content: ( - - ), - }, - ]} - /> + <> + + } + tabs={[ + { + label: '', + content: ( + + ), + }, + ]} + /> + + + ); }; @@ -114,6 +213,8 @@ const DatasetsPageHeaderExtra: React.FC<{ showDeleteButton?: boolean; showCompareButton?: boolean; onCompare: () => void; + onCreateDataset: () => void; + isReadonly: boolean; }> = ({ entity, project, @@ -123,6 +224,8 @@ const DatasetsPageHeaderExtra: React.FC<{ showDeleteButton, showCompareButton, onCompare, + onCreateDataset, + isReadonly, }) => { const compareButton = showCompareButton ? ( + )} {compareButton} {deleteButton} diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/ResizableDrawer.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/ResizableDrawer.tsx index f2c8e6a4599f..a579cafcd7f3 100644 --- a/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/ResizableDrawer.tsx +++ b/weave-js/src/components/PagePanelComponents/Home/Browse3/pages/common/ResizableDrawer.tsx @@ -118,6 +118,7 @@ export const ResizableDrawer: React.FC = ({ }} onMouseDown={handleMouseDown} /> + {headerContent} {children} ); diff --git a/weave-js/yarn.lock b/weave-js/yarn.lock index 2f4a0c9663a5..0d82963838d1 100644 --- a/weave-js/yarn.lock +++ b/weave-js/yarn.lock @@ -4565,6 +4565,13 @@ resolved "https://registry.yarnpkg.com/@types/pako/-/pako-2.0.0.tgz#12ab4c19107528452e73ac99132c875ccd43bdfb" integrity sha512-10+iaz93qR5WYxTo+PMifD5TSxiOtdRaxBf7INGGXMQgTCu8Z/7GYWYFUOS3q/G0nE5boj1r4FEB+WSy7s5gbA== +"@types/papaparse@^5.3.15": + version "5.3.15" + resolved "https://registry.yarnpkg.com/@types/papaparse/-/papaparse-5.3.15.tgz#7cafa16757a1d121422deefbb10b6310b224ecc4" + integrity sha512-JHe6vF6x/8Z85nCX4yFdDslN11d+1pr12E526X8WAfhadOeaOTx5AuIkvDKIBopfvlzpzkdMx4YyvSKCM9oqtw== + dependencies: + "@types/node" "*" + "@types/parse-json@^4.0.0": version "4.0.0" resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0" @@ -11678,6 +11685,11 @@ pako@^2.1.0: resolved "https://registry.yarnpkg.com/pako/-/pako-2.1.0.tgz#266cc37f98c7d883545d11335c00fbd4062c9a86" integrity sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug== +papaparse@^5.5.2: + version "5.5.2" + resolved "https://registry.yarnpkg.com/papaparse/-/papaparse-5.5.2.tgz#fb67cc5a03ba8930cb435dc4641a25d6804bd4d7" + integrity sha512-PZXg8UuAc4PcVwLosEEDYjPyfWnTEhOrUfdv+3Bx+NuAb+5NhDmXzg5fHWmdCh1mP5p7JAZfFr3IMQfcntNAdA== + param-case@^3.0.4: version "3.0.4" resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5"