diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c18dd8d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/test_remote_vector_index_builder/test_placeholder.py b/remote_vector_index_builder/core/common/__init__.py
similarity index 82%
rename from test_remote_vector_index_builder/test_placeholder.py
rename to remote_vector_index_builder/core/common/__init__.py
index 2a99f7f..fe22b86 100644
--- a/test_remote_vector_index_builder/test_placeholder.py
+++ b/remote_vector_index_builder/core/common/__init__.py
@@ -4,7 +4,3 @@
 # The OpenSearch Contributors require contributions made to
 # this file be licensed under the Apache-2.0 license or a
 # compatible open source license.
-
-
-def test_placeholder():
-    assert 1 + 1 == 2
diff --git a/remote_vector_index_builder/core/common/exceptions.py b/remote_vector_index_builder/core/common/exceptions.py
new file mode 100644
index 0000000..0e5cf5c
--- /dev/null
+++ b/remote_vector_index_builder/core/common/exceptions.py
@@ -0,0 +1,36 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+"""
+Expose public exceptions & warnings
+"""
+
+
+class BlobError(Exception):
+    """Generic error raised when blob is downloaded to or uploaded from Object Store"""
+
+    def __init__(self, message: str):
+        super().__init__(message)
+
+
+class UnsupportedObjectStoreTypeError(ValueError):
+    """Error raised when creating an Object Store object"""
+
+    pass
+
+
+class VectorsDatasetError(Exception):
+    """Generic error raised when converting a buffer into a Vector Dataset"""
+
+    def __init__(self, message: str):
+        super().__init__(message)
+
+
+class UnsupportedVectorsDataTypeError(ValueError):
+    """Error raised when creating a Vector Dataset because of unsupported data type"""
+
+    pass
diff --git a/remote_vector_index_builder/core/common/models/__init__.py b/remote_vector_index_builder/core/common/models/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/remote_vector_index_builder/core/common/models/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/remote_vector_index_builder/core/common/models/index_build_parameters.py b/remote_vector_index_builder/core/common/models/index_build_parameters.py
new file mode 100644
index 0000000..e09f433
--- /dev/null
+++ b/remote_vector_index_builder/core/common/models/index_build_parameters.py
@@ -0,0 +1,151 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+from enum import Enum
+from typing import Annotated
+
+from core.object_store.types import ObjectStoreType
+from pydantic import BaseModel, ConfigDict, Field
+
+# Type annotation for vector file paths that must end with .knnvec
+VectorPathRegex = Annotated[str, Field(pattern=".+\\.knnvec$")]
+
+
+class DataType(str, Enum):
+    """Supported data types for vector values.
+
+    Attributes:
+        FLOAT32: 32-bit floating point values
+        FLOAT16: 16-bit floating point values
+        BYTE: 8-bit integer values
+        BINARY: Binary data format
+    """
+
+    FLOAT32 = "fp32"
+    FLOAT16 = "fp16"
+    BYTE = "byte"
+    BINARY = "binary"
+
+
+class SpaceType(str, Enum):
+    """Distance method used for measuring vector similarities.
+
+    Attributes:
+        L2: Euclidean distance
+        COSINESIMIL: Cosine similarity
+        L1: Manhattan distance
+        LINF: Chebyshev distance
+        INNERPRODUCT: Dot product similarity
+        HAMMING: Hamming distance for binary vectors
+    """
+
+    L2 = "l2"
+    COSINESIMIL = "cosinesimil"
+    L1 = "l1"
+    LINF = "linf"
+    INNERPRODUCT = "innerproduct"
+    HAMMING = "hamming"
+
+
+class Algorithm(str, Enum):
+    """Supported algorithms for vector indexing.
+
+    Attributes:
+        HNSW: Hierarchical Navigable Small World graph
+    """
+
+    HNSW = "hnsw"
+
+
+class Engine(str, Enum):
+    """Available vector search engines.
+
+    Attributes:
+        FAISS: Facebook AI Similarity Search
+    """
+
+    FAISS = "faiss"
+
+
+class AlgorithmParameters(BaseModel):
+    """Configuration parameters for the HNSW algorithm.
+
+    Attributes:
+        ef_construction (int): Size of the dynamic candidate list for constructing
+            the HNSW graph. Higher values lead to better quality but slower
+            index construction. Defaults to 100.
+        m (int): Number of bi-directional links created for every new element
+            during construction. Higher values lead to better search speed but
+            more memory consumption. Defaults to 16.
+    Note:
+        The class is configured to allow extra attributes using the ConfigDict class.
+    """
+
+    ef_construction: int = 100
+    m: int = 16
+    model_config = ConfigDict(extra="allow")
+
+
+class IndexParameters(BaseModel):
+    """Configuration parameters for vector index construction.
+
+    This class defines the core index configuration including the algorithm type,
+    distance metric, and algorithm-specific parameters.
+
+    Attributes:
+        algorithm (Algorithm): The vector indexing algorithm to use.
+            Defaults to HNSW (Hierarchical Navigable Small World).
+        space_type (SpaceType): The distance metric to use for vector comparisons.
+            Defaults to L2 (Euclidean distance).
+        algorithm_parameters (AlgorithmParameters): Specific parameters for the chosen
+            algorithm. Defaults to standard HNSW parameters (ef_construction=128, m=16).
+    """
+
+    algorithm: Algorithm = Algorithm.HNSW
+    space_type: SpaceType = SpaceType.L2
+    algorithm_parameters: AlgorithmParameters = Field(
+        default_factory=AlgorithmParameters
+    )
+
+
+class IndexBuildParameters(BaseModel):
+    """Parameters required for building a vector index.
+
+    This class encapsulates all necessary parameters for constructing a vector index,
+    including data source information, vector specifications, and index configuration.
+
+    Attributes:
+        repository_type (str): The type of repository where the vector data is stored.
+            Defaults to s3
+        container_name (str): Name of the container (e.g., S3 bucket) containing the vector data.
+        vector_path (VectorPathRegex): Path to the vector data file. Must end with .knnvec extension.
+        doc_id_path (str): Path to the document IDs corresponding to the vectors.
+        tenant_id (str): Optional identifier for multi-tenant scenarios. Defaults to empty string.
+        dimension (int): The dimensionality of the vectors to be indexed.
+        doc_count (int): Total number of documents/vectors to be indexed.
+        data_type (DataType): The numerical format of the vector data.
+            Defaults to FLOAT32.
+        engine (Engine): The vector search engine to use for indexing.
+            Defaults to FAISS.
+        index_parameters (IndexParameters): Configuration for the index structure
+            and algorithm. Defaults to standard HNSW configuration.
+
+    Note:
+        The class is configured to forbid extra attributes using the ConfigDict class,
+        ensuring strict parameter validation.
+    """
+
+    repository_type: ObjectStoreType = ObjectStoreType.S3
+    container_name: str
+    vector_path: VectorPathRegex
+    doc_id_path: str
+    tenant_id: str = ""
+    dimension: int = Field(gt=0)
+    doc_count: int = Field(gt=0)
+    data_type: DataType = DataType.FLOAT32
+    engine: Engine = Engine.FAISS
+    index_parameters: IndexParameters = Field(default_factory=IndexParameters)
+    model_config = ConfigDict(extra="forbid")
diff --git a/remote_vector_index_builder/core/common/models/vectors_dataset.py b/remote_vector_index_builder/core/common/models/vectors_dataset.py
new file mode 100644
index 0000000..cc73d42
--- /dev/null
+++ b/remote_vector_index_builder/core/common/models/vectors_dataset.py
@@ -0,0 +1,119 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from dataclasses import dataclass
+from io import BytesIO
+
+import numpy as np
+from core.common.exceptions import UnsupportedVectorsDataTypeError, VectorsDatasetError
+from core.common.models.index_build_parameters import DataType
+
+
+@dataclass
+class VectorsDataset:
+    """A class for handling vector datasets and their associated document IDs.
+
+    This class provides functionality to parse, validate, and store vector data along with
+    their corresponding document IDs. It supports multiple data types including FLOAT32,
+    FLOAT16, BYTE, and BINARY formats.
+
+    Attributes:
+        vectors (numpy.ndarray): The array of vectors, where each row represents a vector.
+        doc_ids (numpy.ndarray): Array of document IDs corresponding to the vectors.
+    """
+
+    vectors: np.ndarray
+    doc_ids: np.ndarray
+
+    def free_vectors_space(self):
+        """Free up memory by deleting the vectors and document IDs arrays."""
+        del self.vectors
+        del self.doc_ids
+
+    @staticmethod
+    def get_numpy_dtype(dtype: DataType):
+        """Convert DataType enum to numpy dtype string.
+
+        Args:
+            dtype (DataType): The data type enum value to convert.
+
+        Returns:
+            str: The corresponding numpy dtype string.
+
+        Raises:
+            UnsupportedVectorsDataTypeError: If the provided data type is not supported.
+        """
+        if dtype == DataType.FLOAT32:
+            return "<f4"
+        elif dtype == DataType.FLOAT16:
+            return "<f2"
+        elif dtype == DataType.BYTE:
+            return "<i1"
+        elif dtype == DataType.BINARY:
+            return "<i1"
+        else:
+            raise UnsupportedVectorsDataTypeError(f"Unsupported data type: {dtype}")
+
+    @staticmethod
+    def check_dimensions(vectors, expected_length):
+        """Validate that the vector array has the expected length.
+
+        Args:
+            vectors: Array-like object to check.
+            expected_length (int): The expected length of the vectors array.
+
+        Raises:
+            VectorsDatasetError: If the vectors length doesn't match the expected length.
+        """
+        if len(vectors) != expected_length:
+            raise VectorsDatasetError(
+                f"Expected {expected_length} vectors, but got {len(vectors)}"
+            )
+
+    @staticmethod
+    def parse(
+        vectors: BytesIO,
+        doc_ids: BytesIO,
+        dimension: int,
+        doc_count: int,
+        vector_dtype: DataType,
+    ):
+        """Parse binary vector data and document IDs into numpy arrays.
+
+        This method reads binary data for vectors and document IDs, validates their
+        dimensions, and creates a new VectorsDataset instance.
+
+        Args:
+            vectors (BytesIO): Binary stream containing vector data.
+            doc_ids (BytesIO): Binary stream containing document IDs.
+            dimension (int): The dimensionality of each vector.
+            doc_count (int): Expected number of vectors/documents.
+            vector_dtype (DataType): The data type of the vector values.
+
+        Returns:
+            VectorsDataset: A new instance containing the parsed vectors and document IDs.
+
+        Raises:
+            VectorsDatasetError: If there are any errors during parsing or validation.
+        """
+        try:
+            # Create a view into the buffer, to prevent additional allocation of memory
+            vector_view = vectors.getbuffer()
+            np_vectors = np.frombuffer(
+                vector_view, dtype=VectorsDataset.get_numpy_dtype(vector_dtype)
+            )
+            VectorsDataset.check_dimensions(np_vectors, doc_count * dimension)
+            np_vectors = np_vectors.reshape(doc_count, dimension)
+
+            # Do the same for doc ids
+            doc_id_view = doc_ids.getbuffer()
+            np_doc_ids = np.frombuffer(doc_id_view, dtype="<i4")
+            VectorsDataset.check_dimensions(np_doc_ids, doc_count)
+
+        except (ValueError, TypeError, MemoryError, RuntimeError) as e:
+            raise VectorsDatasetError(f"Error parsing vectors: {e}") from e
+        return VectorsDataset(np_vectors, np_doc_ids)
diff --git a/remote_vector_index_builder/core/main.py b/remote_vector_index_builder/core/main.py
index fe22b86..afb6b2a 100644
--- a/remote_vector_index_builder/core/main.py
+++ b/remote_vector_index_builder/core/main.py
@@ -4,3 +4,14 @@
 # The OpenSearch Contributors require contributions made to
 # this file be licensed under the Apache-2.0 license or a
 # compatible open source license.
+
+# TODO: Call each task from tasks.py in sequence, main()
+# Add this file as the entry point for the Dockerfile
+
+
+def main():
+    pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/remote_vector_index_builder/core/object_store/__init__.py b/remote_vector_index_builder/core/object_store/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/remote_vector_index_builder/core/object_store/object_store.py b/remote_vector_index_builder/core/object_store/object_store.py
new file mode 100644
index 0000000..3b7b86a
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/object_store.py
@@ -0,0 +1,59 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from abc import ABC, abstractmethod
+from io import BytesIO
+
+
+class ObjectStore(ABC):
+    """
+    Abstract base class defining the interface for object storage operations.
+
+    This class provides a common interface for reading and writing blobs to various
+    object storage implementations (e.g., S3, local filesystem, etc.).
+
+    All concrete implementations of object storage must inherit from this class
+    and implement the abstract methods.
+    """
+
+    @abstractmethod
+    def read_blob(self, remote_store_path: str, bytes_buffer: BytesIO) -> None:
+        """
+        Downloads the blob from the remote_store_path, to a buffer in memory
+
+        Args:
+            remote_store_path (str): The path/key to the remote object to be downloaded
+            bytes_buffer (BytesIO): A bytes buffer where the downloaded data will be stored
+
+        Returns:
+            None
+
+        Note:
+            - The bytes_buffer should be properly initialized before passing to this method
+            - Caller is also responsible for cleaning up the bytes buffer
+            - Implementations should handle any necessary authentication and error handling
+        """
+        pass
+
+    @abstractmethod
+    def write_blob(self, local_file_path: str, remote_store_path: str) -> None:
+        """
+        Uploads the blob at local_file_path to the remote_store_path
+
+        Args:
+            local_file_path (str): Path to the local file that needs to be uploaded
+            remote_store_path (str): The path/key where the file should be stored in remote storage
+
+        Returns:
+            None
+
+        Note:
+            - Implementations should handle any necessary authentication and error handling
+            - The local file must exist and be readable
+            - The remote path should be valid for the specific storage implementation
+        """
+        pass
diff --git a/remote_vector_index_builder/core/object_store/object_store_factory.py b/remote_vector_index_builder/core/object_store/object_store_factory.py
new file mode 100644
index 0000000..69bb44b
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/object_store_factory.py
@@ -0,0 +1,56 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from typing import Any, Dict
+
+from core.common.exceptions import UnsupportedObjectStoreTypeError
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.object_store.object_store import ObjectStore
+from core.object_store.s3.s3_object_store import S3ObjectStore
+from core.object_store.types import ObjectStoreType
+
+
+class ObjectStoreFactory:
+    """
+    A factory class for creating object store instances.
+
+    This class provides a static method to create appropriate object store instances
+    based on the repository type specified in the index build parameters. It serves
+    as a central point for object store instance creation and helps maintain loose
+    coupling between different object store implementations.
+    """
+
+    @staticmethod
+    def create_object_store(
+        index_build_params: IndexBuildParameters, object_store_config: Dict[str, Any]
+    ) -> ObjectStore:
+        """
+        Creates and returns an appropriate object store instance based on the repository type.
+
+        Args:
+            index_build_params (IndexBuildParameters): Parameters for index building, including
+                the repository type that determines which object store implementation to use.
+            object_store_config (Dict[str, Any]): Configuration dictionary containing settings
+                specific to the object store implementation.
+
+        Returns:
+            ObjectStore: An instance of the appropriate object store implementation.
+
+        Raises:
+            UnsupportedObjectStoreTypeError: If the specified repository type is not supported.
+
+        Example:
+            params = IndexBuildParameters(repository_type=ObjectStoreType.S3)
+            config = {"region": "us-west-2"}
+            store = ObjectStoreFactory.create_object_store(params, config)
+        """
+        if index_build_params.repository_type == ObjectStoreType.S3:
+            return S3ObjectStore(index_build_params, object_store_config)
+        else:
+            raise UnsupportedObjectStoreTypeError(
+                f"Unknown object store type: {index_build_params.repository_type}"
+            )
diff --git a/remote_vector_index_builder/core/object_store/s3/__init__.py b/remote_vector_index_builder/core/object_store/s3/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/s3/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/remote_vector_index_builder/core/object_store/s3/s3_object_store.py b/remote_vector_index_builder/core/object_store/s3/s3_object_store.py
new file mode 100644
index 0000000..3e5835a
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/s3/s3_object_store.py
@@ -0,0 +1,225 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+import logging
+import os
+import threading
+from functools import cache
+from io import BytesIO
+from typing import Any, Dict
+
+import boto3
+from boto3.s3.transfer import TransferConfig
+from botocore.config import Config
+from botocore.exceptions import ClientError
+from core.common.exceptions import BlobError
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.object_store.object_store import ObjectStore
+
+logger = logging.getLogger(__name__)
+
+
+@cache
+def get_boto3_client(region: str, retries: int) -> boto3.client:
+    """Create or retrieve a cached boto3 S3 client.
+
+    Args:
+        region (str): AWS region name for the S3 client
+        retries (int): Maximum number of retry attempts for failed requests
+
+    Returns:
+        boto3.client: Configured S3 client instance
+    """
+    config = Config(retries={"max_attempts": retries})
+    return boto3.client("s3", config=config, region_name=region)
+
+
+class S3ObjectStore(ObjectStore):
+    """S3 implementation of the ObjectStore interface for managing vector data files.
+
+    This class handles interactions with AWS S3, including file uploads and downloads,
+    with configurable retry logic and transfer settings for optimal performance.
+
+    Attributes:
+        DEFAULT_TRANSFER_CONFIG (dict): Default configuration for S3 file transfers,
+            including chunk sizes, concurrency, and retry settings
+
+    Args:
+        index_build_params (IndexBuildParameters): Parameters for the index building process
+        object_store_config (Dict[str, Any]): Configuration options for S3 interactions
+    """
+
+    DEFAULT_TRANSFER_CONFIG = {
+        "multipart_chunksize": 10 * 1024 * 1024,  # 10MB
+        "max_concurrency": (os.cpu_count() or 2)
+        // 2,  # os.cpu_count can None, according to mypy. If it is none, then default to 1 thread
+        "multipart_threshold": 10 * 1024 * 1024,  # 10MB
+        "use_threads": True,
+        "max_bandwidth": None,
+        "io_chunksize": 256 * 1024,  # 256KB
+        "num_download_attempts": 5,
+        "max_io_queue": 100,
+        "preferred_transfer_client": "auto",
+    }
+
+    def __init__(
+        self,
+        index_build_params: IndexBuildParameters,
+        object_store_config: Dict[str, Any],
+    ):
+        """Initialize the S3ObjectStore with the given parameters and configuration.
+
+        Args:
+            index_build_params (IndexBuildParameters): Contains bucket name and other
+                index building parameters
+            object_store_config (Dict[str, Any]): Configuration dictionary containing:
+                - retries (int): Maximum number of retry attempts (default: 3)
+                - region (str): AWS region name (default: 'us-west-2')
+                - transfer_config (Dict[str, Any]): s3 TransferConfig parameters
+                - debug: Turns on debug mode (default: False)
+        """
+        self.bucket = index_build_params.container_name
+        self.max_retries = object_store_config.get("retries", 3)
+        self.region = object_store_config.get("region", "us-west-2")
+
+        self.s3_client = get_boto3_client(region=self.region, retries=self.max_retries)
+
+        transfer_config = object_store_config.get("transfer_config", {})
+        # Create transfer config with validated parameters
+        self.transfer_config = self._create_transfer_config(transfer_config)
+
+        self.debug = object_store_config.get("debug", False)
+
+        # Debug mode provides progress tracking on downloads and uploads
+        if self.debug:
+            self._read_progress = 0
+            self._read_progress_lock = threading.Lock()
+            self._write_progress = 0
+            self._write_progress_lock = threading.Lock()
+
+    def _create_transfer_config(self, custom_config: Dict[str, Any]) -> TransferConfig:
+        """
+        Creates a TransferConfig with custom parameters while maintaining defaults
+        for unspecified values.
+
+        Args:
+            custom_config: Dictionary of custom transfer configuration parameters
+
+        Returns:
+            TransferConfig: Configured transfer configuration object
+        """
+        # Start with default values
+        config_params = self.DEFAULT_TRANSFER_CONFIG.copy()
+
+        # Update with custom values, only if they are valid parameters
+        for key, value in custom_config.items():
+            if key in self.DEFAULT_TRANSFER_CONFIG:
+                config_params[key] = value
+            else:
+                logger.info(
+                    f"Warning: Ignoring invalid transfer config parameter: {key}"
+                )
+
+        # Remove None values to let boto3 use its internal defaults
+        config_params = {k: v for k, v in config_params.items() if v is not None}
+
+        return TransferConfig(**config_params)
+
+    def read_blob(self, remote_store_path: str, bytes_buffer: BytesIO) -> None:
+        """
+        Downloads a blob from S3 to the provided bytes buffer, with retry logic.
+
+        Args:
+            remote_store_path (str): The S3 key (path) of the object to download
+            bytes_buffer (BytesIO): A bytes buffer to store the downloaded data
+
+        Returns:
+            None
+
+        Note:
+            - boto3 automatically handles retries for the exceptions given here:
+                - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
+            - Resets buffer position to 0 after successful download
+            - Uses configured TransferConfig for download parameters
+                - boto3 may perform the download in parallel multipart chunks,
+                based on the TransferConfig setting
+
+        Raises:
+            BlobError: If download fails after all retry attempts or encounters non-retryable error
+        """
+
+        callback_func = None
+
+        # Set up progress callback, if debug mode is on
+        if self.debug:
+            with self._read_progress_lock:
+                self._read_progress = 0
+
+            def callback(bytes_transferred):
+                with self._read_progress_lock:
+                    self._read_progress += bytes_transferred
+                    logger.info(f"Downloaded: {self._read_progress:,} bytes")
+
+            callback_func = callback
+
+        try:
+            self.s3_client.download_fileobj(
+                self.bucket,
+                remote_store_path,
+                bytes_buffer,
+                Config=self.transfer_config,
+                Callback=callback_func,
+            )
+            return
+        except ClientError as e:
+            raise BlobError(f"Error downloading file: {e}") from e
+
+    def write_blob(self, local_file_path: str, remote_store_path: str) -> None:
+        """
+        Uploads a local file to S3, with retry logic.
+
+        Args:
+            local_file_path (str): Path to the local file to be uploaded
+            remote_store_path (str): The S3 key (path) where the file will be stored
+
+        Returns:
+            None
+
+        Note:
+            - boto3 automatically handles retries for the exceptions given here:
+                - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
+            - Uses configured TransferConfig for upload parameters
+                - boto3 may perform the upload in parallel multipart chunks, based on the TransferConfig setting
+
+        Raises:
+            BlobError: If upload fails after all retry attempts or encounters a non-retryable error
+        """
+
+        callback_func = None
+        if self.debug:
+            # Set up progress callback, if debug mode is on
+            with self._write_progress_lock:
+                self._write_progress = 0
+
+            def callback(bytes_amount):
+                with self._write_progress_lock:
+                    self._write_progress += bytes_amount
+                    logger.info(f"Uploaded: {self._write_progress:,} bytes")
+
+            callback_func = callback
+
+        try:
+            self.s3_client.upload_file(
+                local_file_path,
+                self.bucket,
+                remote_store_path,
+                Config=self.transfer_config,
+                Callback=callback_func,
+            )
+            return
+        except ClientError as e:
+            raise BlobError(f"Error uploading file: {e}") from e
diff --git a/remote_vector_index_builder/core/object_store/types.py b/remote_vector_index_builder/core/object_store/types.py
new file mode 100644
index 0000000..90a633e
--- /dev/null
+++ b/remote_vector_index_builder/core/object_store/types.py
@@ -0,0 +1,22 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from enum import Enum
+
+
+class ObjectStoreType(str, Enum):
+    """
+    Enumeration of supported object store types.
+
+    This enum inherits from both str and Enum to provide string-based
+    enumeration values, allowing for easy serialization and comparison.
+
+    Attributes:
+        S3 (str): Represents Amazon S3 object storage service
+    """
+
+    S3 = "s3"
diff --git a/remote_vector_index_builder/core/tasks.py b/remote_vector_index_builder/core/tasks.py
new file mode 100644
index 0000000..7866baf
--- /dev/null
+++ b/remote_vector_index_builder/core/tasks.py
@@ -0,0 +1,130 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+"""
+core.tasks
+~~~~~~~~~~~~~~~~~
+
+This module contains the tasks necessary to build an index on GPUs.
+These tasks must be run in the following sequence, for a given build request:
+1. create_vectors_dataset
+2. build_index
+3. upload_index
+
+"""
+import logging
+from io import BytesIO
+from typing import Any, Dict
+
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.common.models.vectors_dataset import VectorsDataset
+from core.object_store.object_store_factory import ObjectStoreFactory
+
+logger = logging.getLogger(__name__)
+
+# TODO: Create build_index task
+
+
+def create_vectors_dataset(
+    index_build_params: IndexBuildParameters,
+    object_store_config: Dict[str, Any],
+    vector_bytes_buffer: BytesIO,
+    doc_id_bytes_buffer: BytesIO,
+) -> VectorsDataset:
+    """
+    Downloads vector and document ID data from object storage and creates a VectorsDataset.
+
+    This function performs the first step in the index building process by:
+    1. Creating an appropriate object store instance
+    2. Downloading vector data from the specified vector_path, into the vector_bytes_buffer
+    3. Downloading document IDs from the specified doc_id_path, into the doc_id_bytes_buffer
+    4. Combining them into a VectorsDataset object
+
+    Args:
+        index_build_params (IndexBuildParameters): Contains the configuration for the index build,
+            including:
+            - vector_path: Path to the vector data in object storage
+            - doc_id_path: Path to the document IDs in object storage
+            - repository_type: Type of object store to use
+        object_store_config (Dict[str, Any]): Configuration for the object store
+            containing connection details
+        vector_bytes_buffer: Buffer for storing vector binary data
+        doc_id_bytes_buffer: Buffer for storing doc id binary data
+
+    Returns:
+        VectorsDataset: A dataset object containing:
+            - The downloaded vectors in the specified format
+            - Associated document IDs for each vector
+
+    Note:
+        - Uses BytesIO buffers for memory-efficient data handling
+            - The caller is responsible for closing each buffer
+            - Before closing the buffers, caller must call free_vector_space on VectorDataset object,
+                to remove all references to the underlying data.
+        - Both vector and document ID files must exist in object storage
+        - The number of vectors must match the number of document IDs
+        - Memory usage scales with the size of the vector and document ID data
+
+    Raises:
+        BlobError: If there are issues accessing or reading from object storage
+        VectorDatasetError: If there are issues parsing the vectors and/or doc IDs into a VectorDataset
+        UnsupportedVectorsDataTypeError: If the index_build_params.data_type is not supported
+        UnsupportedObjectStoreTypeError: If the index_build_params.repository_type is not supported
+
+    """
+    object_store = ObjectStoreFactory.create_object_store(
+        index_build_params, object_store_config
+    )
+
+    object_store.read_blob(index_build_params.vector_path, vector_bytes_buffer)
+    object_store.read_blob(index_build_params.doc_id_path, doc_id_bytes_buffer)
+
+    return VectorsDataset.parse(
+        vector_bytes_buffer,
+        doc_id_bytes_buffer,
+        index_build_params.dimension,
+        index_build_params.doc_count,
+        index_build_params.data_type,
+    )
+
+
+def upload_index(
+    index_build_params: IndexBuildParameters,
+    object_store_config: Dict[str, Any],
+    index_local_path: str,
+) -> None:
+    """
+    Uploads a built index from a local path to the configured object store.
+
+    Args:
+        index_build_params (IndexBuildParameters): Parameters for the index build process,
+            containing the vector path which is used to determine the upload destination
+        object_store_config (Dict[str, Any]): Configuration dictionary for the object store
+            containing connection details
+        index_local_path (str): Local filesystem path where the built index is stored
+
+    Returns:
+        None
+
+    Note:
+        - Creates an object store instance based on the provided configuration
+        - Uses the vector_path from index_build_params to determine the upload destination
+        - The index_local_path must exist and be readable
+        - The function assumes index_build_params has been validated by Pydantic
+
+    Raises:
+        BlobError: If there are issues uploading to the object store
+        UnsupportedObjectStoreTypeError: If the index_build_params.repository_type is not supported
+    """
+    object_store = ObjectStoreFactory.create_object_store(
+        index_build_params, object_store_config
+    )
+
+    # vector_path is unique for each index build request, so we can simply append the local path
+    index_remote_path = index_build_params.vector_path + index_local_path
+
+    object_store.write_blob(index_local_path, index_remote_path)
diff --git a/remote_vector_index_builder/requirements.txt b/remote_vector_index_builder/requirements.txt
new file mode 100644
index 0000000..96c9f4f
--- /dev/null
+++ b/remote_vector_index_builder/requirements.txt
@@ -0,0 +1,3 @@
+pydantic>=2.7.0,<3.0.0
+boto3>=1.36,<2.0.0
+numpy>=1.26,<2.0.0
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index b3066ad..eebf4d4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -16,3 +16,9 @@ show-source = True
 [black]
 max-line-length = 120
 target-version = 'py38'
+
+[mypy]
+ignore_missing_imports=True
+
+[tool:pytest]
+pythonpath = remote_vector_index_builder
\ No newline at end of file
diff --git a/test_remote_vector_index_builder/test_core/__init__.py b/test_remote_vector_index_builder/test_core/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/test_remote_vector_index_builder/test_core/common/__init__.py b/test_remote_vector_index_builder/test_core/common/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/common/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/test_remote_vector_index_builder/test_core/common/models/__init__.py b/test_remote_vector_index_builder/test_core/common/models/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/common/models/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/test_remote_vector_index_builder/test_core/common/models/test_vectors_dataset.py b/test_remote_vector_index_builder/test_core/common/models/test_vectors_dataset.py
new file mode 100644
index 0000000..06a8cbe
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/common/models/test_vectors_dataset.py
@@ -0,0 +1,167 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+from io import BytesIO
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+from core.common.exceptions import UnsupportedVectorsDataTypeError, VectorsDatasetError
+from core.common.models.index_build_parameters import DataType
+from core.common.models.vectors_dataset import VectorsDataset
+
+
+@pytest.fixture
+def sample_vectors():
+    # Create sample float32 vectors (2 vectors of dimension 3)
+    return np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype="<f4")
+
+
+@pytest.fixture
+def sample_doc_ids():
+    return np.array([1, 2], dtype="<i4")
+
+
+@pytest.fixture
+def vectors_dataset(sample_vectors, sample_doc_ids):
+    return VectorsDataset(vectors=sample_vectors, doc_ids=sample_doc_ids)
+
+
+def test_initialization(sample_vectors, sample_doc_ids):
+    dataset = VectorsDataset(vectors=sample_vectors, doc_ids=sample_doc_ids)
+    assert np.array_equal(dataset.vectors, sample_vectors)
+    assert np.array_equal(dataset.doc_ids, sample_doc_ids)
+
+
+def test_free_vectors_space(vectors_dataset):
+    vectors_dataset.free_vectors_space()
+    with pytest.raises(AttributeError):
+        _ = vectors_dataset.vectors
+    with pytest.raises(AttributeError):
+        _ = vectors_dataset.doc_ids
+
+
+@pytest.mark.parametrize(
+    "dtype, expected",
+    [
+        (DataType.FLOAT32, "<f4"),
+        (DataType.FLOAT16, "<f2"),
+        (DataType.BYTE, "<i1"),
+        (DataType.BINARY, "<i1"),
+    ],
+)
+def test_get_numpy_dtype_valid(dtype, expected):
+    assert VectorsDataset.get_numpy_dtype(dtype) == expected
+
+
+def test_get_numpy_dtype_invalid():
+    with pytest.raises(UnsupportedVectorsDataTypeError):
+        VectorsDataset.get_numpy_dtype("invalid_dtype")
+
+
+def test_check_dimensions_valid():
+    vectors = np.zeros(5)
+    VectorsDataset.check_dimensions(vectors, 5)  # Should not raise
+
+
+def test_check_dimensions_invalid():
+    vectors = np.zeros(5)
+    with pytest.raises(VectorsDatasetError):
+        VectorsDataset.check_dimensions(vectors, 10)
+
+
+@pytest.mark.parametrize(
+    "vector_dtype", [DataType.FLOAT32, DataType.FLOAT16, DataType.BYTE, DataType.BINARY]
+)
+def test_parse_valid_data(vector_dtype):
+    # Prepare test data
+    dimension = 3
+    doc_count = 2
+
+    arr = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+    if vector_dtype == DataType.BYTE:
+        arr = [[1, 2, 3], [4, 5, 6]]
+    elif vector_dtype == DataType.BINARY:
+        arr = [[0, 0, 0], [1, 1, 1]]
+
+    test_vectors = np.array(arr, dtype=VectorsDataset.get_numpy_dtype(vector_dtype))
+    test_doc_ids = np.array([1, 2], dtype="<i4")
+
+    # Convert to binary
+    vectors_binary = BytesIO(test_vectors.tobytes())
+    doc_ids_binary = BytesIO(test_doc_ids.tobytes())
+
+    # Parse
+    dataset = VectorsDataset.parse(
+        vectors=vectors_binary,
+        doc_ids=doc_ids_binary,
+        dimension=dimension,
+        doc_count=doc_count,
+        vector_dtype=vector_dtype,
+    )
+
+    # Verify
+    assert isinstance(dataset, VectorsDataset)
+    assert dataset.vectors.shape == (doc_count, dimension)
+    assert len(dataset.doc_ids) == doc_count
+    assert len(dataset.vectors) == doc_count
+    assert np.array_equal(dataset.doc_ids, test_doc_ids)
+    assert np.array_equal(dataset.vectors, test_vectors)
+
+    dataset.free_vectors_space()
+    vectors_binary.close()
+    doc_ids_binary.close()
+
+
+def test_parse_invalid_doc_count():
+    with pytest.raises(VectorsDatasetError):
+        vectors = BytesIO(np.zeros(6, dtype="<f4").tobytes())
+        doc_ids = BytesIO(np.array([1], dtype="<i4").tobytes())
+        dataset = VectorsDataset.parse(
+            vectors=vectors,
+            doc_ids=doc_ids,
+            dimension=2,
+            doc_count=2,
+            vector_dtype=DataType.FLOAT32,
+        )
+        dataset.free_vectors_space()
+        vectors.close()
+        doc_ids.close()
+
+
+def test_parse_invalid_vector_dimensions():
+    with pytest.raises(VectorsDatasetError):
+        vectors = BytesIO(np.zeros(4, dtype="<f4").tobytes())
+        doc_ids = BytesIO(np.array([1, 2], dtype="<i4").tobytes())
+        dataset = VectorsDataset.parse(
+            vectors=vectors,
+            doc_ids=doc_ids,
+            dimension=3,  # Expecting 6 values (2*3), but only provided 4
+            doc_count=2,
+            vector_dtype=DataType.FLOAT32,
+        )
+        dataset.free_vectors_space()
+        vectors.close()
+        doc_ids.close()
+
+
+#
+def test_parse_invalid_data():
+    with patch("numpy.frombuffer") as mock_frombuffer:
+        mock_frombuffer.side_effect = ValueError("Invalid data")
+        with pytest.raises(VectorsDatasetError):
+            vectors = BytesIO(np.zeros(6, dtype="<f4").tobytes())
+            doc_ids = BytesIO(np.array([1, 2], dtype="<i4").tobytes())
+            dataset = VectorsDataset.parse(
+                vectors=vectors,
+                doc_ids=doc_ids,
+                dimension=3,
+                doc_count=2,
+                vector_dtype=DataType.FLOAT32,
+            )
+            dataset.free_vectors_space()
+            vectors.close()
+            doc_ids.close()
diff --git a/test_remote_vector_index_builder/test_core/test_object_store/__init__.py b/test_remote_vector_index_builder/test_core/test_object_store/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_object_store/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/test_remote_vector_index_builder/test_core/test_object_store/test_object_store_factory.py b/test_remote_vector_index_builder/test_core/test_object_store/test_object_store_factory.py
new file mode 100644
index 0000000..d9b54f0
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_object_store/test_object_store_factory.py
@@ -0,0 +1,57 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+import pytest
+from core.common.exceptions import UnsupportedObjectStoreTypeError
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.object_store.object_store_factory import ObjectStoreFactory
+from core.object_store.s3.s3_object_store import S3ObjectStore
+from core.object_store.types import ObjectStoreType
+
+
+@pytest.fixture
+def index_build_params():
+    """Fixture for index build parameters"""
+    return IndexBuildParameters(
+        container_name="test-bucket",
+        vector_path="test-vector-path.knnvec",
+        doc_id_path="test-doc-id-path",
+        dimension=100,
+        doc_count=10,
+    )
+
+
+@pytest.fixture
+def object_store_config():
+    """Fixture for object store configuration"""
+    return {"region": "us-west-2", "bucket": "XXXXXXXXXXX"}
+
+
+def test_create_s3_object_store(index_build_params, object_store_config):
+    """Test creating an S3 object store"""
+    # Setup
+    index_build_params.repository_type = ObjectStoreType.S3
+
+    # Execute
+    store = ObjectStoreFactory.create_object_store(
+        index_build_params=index_build_params, object_store_config=object_store_config
+    )
+
+    # Assert
+    assert isinstance(store, S3ObjectStore)
+
+
+def test_create_object_store_unsupported_type(index_build_params, object_store_config):
+    """Test creating an object store with unsupported type raises error"""
+    # Setup
+    index_build_params.repository_type = "unsupported_type"
+
+    # Execute and Assert
+    with pytest.raises(UnsupportedObjectStoreTypeError):
+        ObjectStoreFactory.create_object_store(
+            index_build_params=index_build_params,
+            object_store_config=object_store_config,
+        )
diff --git a/test_remote_vector_index_builder/test_core/test_object_store/test_s3/__init__.py b/test_remote_vector_index_builder/test_core/test_object_store/test_s3/__init__.py
new file mode 100644
index 0000000..fe22b86
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_object_store/test_s3/__init__.py
@@ -0,0 +1,6 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
diff --git a/test_remote_vector_index_builder/test_core/test_object_store/test_s3/test_s3_object_store.py b/test_remote_vector_index_builder/test_core/test_object_store/test_s3/test_s3_object_store.py
new file mode 100644
index 0000000..14103b3
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_object_store/test_s3/test_s3_object_store.py
@@ -0,0 +1,207 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from io import BytesIO
+from unittest.mock import Mock, patch
+
+import pytest
+from botocore.exceptions import ClientError
+from core.common.exceptions import BlobError
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.object_store.s3.s3_object_store import S3ObjectStore, get_boto3_client
+
+
+# Mock the logger to prevent actual logging during tests
+@pytest.fixture(autouse=True)
+def mock_logger():
+    with patch("core.object_store.s3.s3_object_store.logger"):
+        yield
+
+
+@pytest.fixture
+def index_build_params():
+    return IndexBuildParameters(
+        container_name="test-bucket",
+        vector_path="test-vector-path.knnvec",
+        doc_id_path="test-doc-id-path",
+        dimension=100,
+        doc_count=10,
+    )
+
+
+@pytest.fixture
+def object_store_config():
+    return {
+        "retries": 3,
+        "region": "us-west-2",
+        "debug": False,
+        "transfer_config": {
+            "multipart_chunksize": 10 * 1024 * 1024,
+            "max_concurrency": 4,
+        },
+    }
+
+
+@pytest.fixture
+def s3_object_store(index_build_params, object_store_config):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        yield store
+
+
+@pytest.fixture
+def bytes_buffer():
+    bytes_buffer = BytesIO()
+    yield bytes_buffer
+    bytes_buffer.close()
+
+
+def test_get_boto3_client():
+    with patch("boto3.client") as mock_client:
+        # Test caching behavior
+        client1 = get_boto3_client("us-west-2", 3)
+        client2 = get_boto3_client("us-west-2", 3)
+        assert client1 == client2
+        mock_client.assert_called_once()
+
+        # Test different parameters create new client
+        get_boto3_client("us-east-1", 3)
+        assert mock_client.call_count == 2
+
+
+def test_s3_object_store_initialization(index_build_params, object_store_config):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        assert store.bucket == "test-bucket"
+        assert store.max_retries == 3
+        assert store.region == "us-west-2"
+        assert store.transfer_config.multipart_chunksize == 10 * 1024 * 1024
+        assert store.transfer_config.max_concurrency == 4
+        assert not store.debug
+
+
+def test_s3_object_store_initialization_defaults(index_build_params):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, {})
+        assert store.max_retries == 3
+        assert store.region == "us-west-2"
+        assert not store.debug
+
+
+# also test if os.cpu_count is none
+def test_s3_object_store_initialization_debug_config(index_build_params):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        with patch("os.cpu_count", return_value=None):
+            store = S3ObjectStore(index_build_params, {"debug": True})
+            assert store.max_retries == 3
+            assert store.region == "us-west-2"
+            assert store.debug
+
+
+def test_create_transfer_config(s3_object_store):
+    custom_config = {
+        "multipart_chunksize": 20 * 1024 * 1024,
+        "max_concurrency": 8,
+        "invalid_param": "value",  # Should be ignored
+    }
+
+    config = s3_object_store._create_transfer_config(custom_config)
+    assert config.multipart_chunksize == 20 * 1024 * 1024
+    assert config.max_concurrency == 8
+    assert "invalid_param" not in config.__dict__
+
+
+def test_read_blob_success(index_build_params, object_store_config, bytes_buffer):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        store.s3_client.download_fileobj = Mock()
+
+        store.read_blob("test/path", bytes_buffer)
+        store.s3_client.download_fileobj.assert_called_once_with(
+            store.bucket,
+            "test/path",
+            bytes_buffer,
+            Config=store.transfer_config,
+            Callback=None,
+        )
+
+
+def test_read_blob_with_debug(index_build_params, object_store_config, bytes_buffer):
+    object_store_config["debug"] = True
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        store.s3_client.download_fileobj = Mock()
+
+        store.read_blob("test/path", bytes_buffer)
+
+        # Verify callback was passed
+        callback = store.s3_client.download_fileobj.call_args.kwargs["Callback"]
+        assert callback is not None
+        # Test the callback directly
+        assert store._read_progress == 0
+        callback(100)  # Simulate 100 bytes transferred
+        assert store._read_progress == 100
+        callback(50)  # Simulate 50 more bytes
+        assert store._read_progress == 150
+
+
+def test_read_blob_failure(index_build_params, object_store_config, bytes_buffer):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        error = ClientError(
+            {"Error": {"Code": "LimitExceededException", "Message": "Limit Exceeded"}},
+            "DownloadFileObj",
+        )
+        store.s3_client.download_fileobj.side_effect = error
+        with pytest.raises(BlobError):
+            store.read_blob("test/path", bytes_buffer)
+
+
+def test_write_blob_success(index_build_params, object_store_config):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        store.s3_client.upload_file = Mock()
+        store.write_blob("local/path", "remote/path")
+
+        store.s3_client.upload_file.assert_called_once_with(
+            "local/path",
+            store.bucket,
+            "remote/path",
+            Config=store.transfer_config,
+            Callback=None,
+        )
+
+
+def test_write_blob_with_debug(index_build_params, object_store_config):
+    object_store_config["debug"] = True
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        store.s3_client.upload_file = Mock()
+
+        store.write_blob("local/path", "remote/path")
+
+        # Verify callback was passed
+        callback = store.s3_client.upload_file.call_args.kwargs["Callback"]
+        assert callback is not None
+        # Test the callback directly
+        assert store._write_progress == 0
+        callback(100)  # Simulate 100 bytes transferred
+        assert store._write_progress == 100
+        callback(50)  # Simulate 50 more bytes
+        assert store._write_progress == 150
+
+
+def test_write_blob_failure(index_build_params, object_store_config):
+    with patch("core.object_store.s3.s3_object_store.get_boto3_client"):
+        store = S3ObjectStore(index_build_params, object_store_config)
+        error = ClientError(
+            {"Error": {"Code": "LimitExceededException", "Message": "Limit Exceeded"}},
+            "UploadFile",
+        )
+        store.s3_client.upload_file.side_effect = error
+        with pytest.raises(BlobError):
+            store.write_blob("local/path", "remote/path")
diff --git a/test_remote_vector_index_builder/test_core/test_object_store/test_types.py b/test_remote_vector_index_builder/test_core/test_object_store/test_types.py
new file mode 100644
index 0000000..8766d58
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_object_store/test_types.py
@@ -0,0 +1,11 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+from core.object_store.types import ObjectStoreType
+
+
+def test_object_store_type_values():
+    assert ObjectStoreType.S3 == "s3"
diff --git a/test_remote_vector_index_builder/test_core/test_tasks.py b/test_remote_vector_index_builder/test_core/test_tasks.py
new file mode 100644
index 0000000..9e5cd7f
--- /dev/null
+++ b/test_remote_vector_index_builder/test_core/test_tasks.py
@@ -0,0 +1,149 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+from io import BytesIO
+from unittest.mock import Mock, patch
+
+import pytest
+from core.common.exceptions import BlobError
+from core.common.models.index_build_parameters import IndexBuildParameters
+from core.common.models.vectors_dataset import VectorsDataset
+from core.object_store.object_store import ObjectStore
+from core.tasks import create_vectors_dataset, upload_index
+
+
+@pytest.fixture
+def mock_object_store():
+    return Mock(spec=ObjectStore)
+
+
+@pytest.fixture
+def mock_object_store_factory():
+    with patch("core.tasks.ObjectStoreFactory.create_object_store") as mock:
+        yield mock
+
+
+@pytest.fixture
+def mock_vectors_dataset():
+    return Mock(spec=VectorsDataset)
+
+
+@pytest.fixture
+def mock_vectors_dataset_parse():
+    with patch("core.tasks.VectorsDataset.parse") as mock:
+        yield mock
+
+
+@pytest.fixture
+def index_build_params():
+    return IndexBuildParameters(
+        vector_path="vec.knnvec",
+        doc_id_path="doc.knndid",
+        dimension=128,
+        doc_count=1000,
+        data_type="fp32",
+        repository_type="s3",
+        container_name="test-bucket",
+    )
+
+
+@pytest.fixture
+def object_store_config():
+    return {"region": "us-west-2", "retries": 3}
+
+
+def test_successful_creation(
+    mock_object_store_factory,
+    mock_vectors_dataset_parse,
+    mock_object_store,
+    index_build_params,
+    object_store_config,
+):
+    # Setup
+    mock_object_store_factory.return_value = mock_object_store
+    mock_vectors_dataset_parse.return_value = Mock(spec=VectorsDataset)
+
+    vectors = BytesIO()
+    doc_ids = BytesIO()
+    # Execute
+    result = create_vectors_dataset(
+        index_build_params, object_store_config, vectors, doc_ids
+    )
+
+    vectors.close()
+    doc_ids.close()
+
+    # Verify
+    mock_object_store_factory.assert_called_once_with(
+        index_build_params, object_store_config
+    )
+    assert mock_object_store.read_blob.call_count == 2
+    mock_vectors_dataset_parse.assert_called_once()
+    assert isinstance(result, VectorsDataset)
+
+    result.free_vectors_space()
+
+
+def test_download_blob_error_handling(
+    mock_object_store_factory,
+    mock_object_store,
+    index_build_params,
+    object_store_config,
+):
+    # Setup
+    mock_object_store_factory.return_value = mock_object_store
+    mock_object_store.read_blob.side_effect = BlobError("Failed to read blob")
+
+    vectors = BytesIO()
+    doc_ids = BytesIO()
+
+    # Execute and verify
+    with pytest.raises(BlobError):
+        create_vectors_dataset(
+            index_build_params, object_store_config, vectors, doc_ids
+        )
+
+    vectors.close()
+    doc_ids.close()
+
+
+def test_successful_upload(
+    mock_object_store_factory,
+    mock_object_store,
+    index_build_params,
+    object_store_config,
+):
+    # Setup
+    mock_object_store_factory.return_value = mock_object_store
+    local_path = "/tmp/index"
+
+    # Execute
+    upload_index(index_build_params, object_store_config, local_path)
+
+    # Verify
+    mock_object_store_factory.assert_called_once_with(
+        index_build_params, object_store_config
+    )
+    mock_object_store.write_blob.assert_called_once_with(
+        local_path, index_build_params.vector_path + local_path
+    )
+
+
+def test_upload_blob_error_handling(
+    mock_object_store_factory,
+    mock_object_store,
+    index_build_params,
+    object_store_config,
+):
+    # Setup
+    mock_object_store_factory.return_value = mock_object_store
+    mock_object_store.write_blob.side_effect = BlobError("Failed to upload")
+    local_path = "/tmp/index"
+
+    # Execute and verify
+    with pytest.raises(BlobError):
+        upload_index(index_build_params, object_store_config, local_path)