From d54e4b6a747f3940a19989a56095a71540e4c0d8 Mon Sep 17 00:00:00 2001 From: Zhuofu Chen <59316330+chenzhuofu@users.noreply.github.com> Date: Wed, 1 May 2024 01:51:57 +0800 Subject: [PATCH 1/7] doc: fix c++ serving example (#1372) Co-authored-by: Gabriele Oliaro --- .github/README.md | 2 +- SERVE.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/README.md b/.github/README.md index 4a2a881c8..c4f6baada 100644 --- a/.github/README.md +++ b/.github/README.md @@ -178,7 +178,7 @@ A C++ example is available at [this folder](../inference/spec_infer/). After bui For example, you can use the following command line to serve a LLaMA-7B or LLaMA-13B model on 4 GPUs and use two collectively boost-tuned LLaMA-68M models for speculative inference. ```bash -./inference/spec_infer/spec_infer -ll:gpu 4 -ll:fsize 14000 -ll:zsize 30000 -llm-model meta-llama/Llama-2-7b-hf -ssm-model JackFram/llama-68m -prompt /path/to/prompt.json -tensor-parallelism-degree 4 --fusion +./inference/spec_infer/spec_infer -ll:gpu 4 -ll:cpu 4 -ll:fsize 14000 -ll:zsize 30000 -llm-model meta-llama/Llama-2-7b-hf -ssm-model JackFram/llama-68m -prompt /path/to/prompt.json -tensor-parallelism-degree 4 --fusion ``` diff --git a/SERVE.md b/SERVE.md index e9bab3d70..9472d50a6 100644 --- a/SERVE.md +++ b/SERVE.md @@ -126,7 +126,7 @@ A C++ example is available at [this folder](../inference/spec_infer/). After bui For example, you can use the following command line to serve a LLaMA-7B or LLaMA-13B model on 4 GPUs and use two collectively boost-tuned LLaMA-68M models for speculative inference. ```bash -./inference/spec_infer/spec_infer -ll:gpu 4 -ll:fsize 14000 -ll:zsize 30000 -llm-model meta-llama/Llama-2-7b-hf -ssm-model JackFram/llama-68m -prompt /path/to/prompt.json -tensor-parallelism-degree 4 --fusion +./inference/spec_infer/spec_infer -ll:gpu 4 -ll:cpu 4 -ll:fsize 14000 -ll:zsize 30000 -llm-model meta-llama/Llama-2-7b-hf -ssm-model JackFram/llama-68m -prompt /path/to/prompt.json -tensor-parallelism-degree 4 --fusion ``` From b90771a376fddbddf09af3f23e4ecae57911438e Mon Sep 17 00:00:00 2001 From: Zhihao Jia Date: Thu, 30 May 2024 14:24:42 -0700 Subject: [PATCH 2/7] Update README.md --- .github/README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/README.md b/.github/README.md index c4f6baada..5aba2295d 100644 --- a/.github/README.md +++ b/.github/README.md @@ -4,12 +4,6 @@ --- -## News🔥: - -* [09/02/2023] Adding AMD GPU support, released Docker images for ROCM 5.3->5.6 -* [08/16/2023] Adding Starcoder model support -* [08/14/2023] Released Docker images for different CUDA versions - ## What is FlexFlow Serve The high computational and memory requirements of generative large language From 385c118447a8b1451de3641c8ecf437245b9248b Mon Sep 17 00:00:00 2001 From: FelixBrakel Date: Thu, 30 May 2024 23:39:10 +0200 Subject: [PATCH 3/7] Add examples for every layer in the python layer API (#1297) * Fix incorrect innode being checked * Add example for every layer on the FFModel python class --------- Co-authored-by: Gabriele Oliaro Co-authored-by: Zhihao Jia --- docs/source/python/layers.rst | 2 +- examples/python/native/ops/add.py | 45 ++++++++ .../ops/add_bias_residual_layer_norm.py | 78 +++++++++++++ examples/python/native/ops/arg_top_k.py | 61 ++++++++++ examples/python/native/ops/argmax.py | 55 +++++++++ examples/python/native/ops/batch_matmul.py | 0 examples/python/native/ops/batch_norm.py | 36 ++++++ examples/python/native/ops/beam_top_k.py | 58 ++++++++++ examples/python/native/ops/concat.py | 43 +++++++ examples/python/native/ops/conv2d.py | 45 ++++++++ examples/python/native/ops/cos.py | 44 +++++++ examples/python/native/ops/dense.py | 38 +++++++ examples/python/native/ops/divide.py | 48 ++++++++ examples/python/native/ops/dropout.py | 49 ++++++++ examples/python/native/ops/elu.py | 47 ++++++++ examples/python/native/ops/embedding.py | 39 +++++++ examples/python/native/ops/exp.py | 0 examples/python/native/ops/flat.py | 0 examples/python/native/ops/gather.py | 60 ++++++++++ examples/python/native/ops/gelu.py | 51 +++++++++ examples/python/native/ops/identity.py | 49 ++++++++ .../ops/inc_multihead_self_attention.py | 103 +++++++++++++++++ .../inc_multihead_self_attention_verify.py | 103 +++++++++++++++++ .../ops/inc_multiquery_self_attention.py | 107 ++++++++++++++++++ .../inc_multiquery_self_attention_verify.py | 107 ++++++++++++++++++ examples/python/native/ops/layer_norm.py | 48 ++++++++ examples/python/native/ops/max.py | 54 +++++++++ examples/python/native/ops/mean.py | 48 ++++++++ examples/python/native/ops/min.py | 54 +++++++++ .../python/native/ops/multihead_attention.py | 0 examples/python/native/ops/multiply.py | 45 ++++++++ examples/python/native/ops/pool2d.py | 36 ++++++ examples/python/native/ops/pow.py | 46 ++++++++ examples/python/native/ops/reduce_sum.py | 48 ++++++++ examples/python/native/ops/relu.py | 46 ++++++++ examples/python/native/ops/reshape.py | 41 +++++++ .../python/native/ops/residual_layer_norm.py | 93 +++++++++++++++ .../python/native/ops/residual_rms_norm.py | 80 +++++++++++++ examples/python/native/ops/reverse.py | 37 ++++++ examples/python/native/ops/rms_norm.py | 64 +++++++++++ examples/python/native/ops/rsqrt.py | 44 +++++++ examples/python/native/ops/sampling.py | 55 +++++++++ examples/python/native/ops/scalar_add.py | 53 +++++++++ examples/python/native/ops/scalar_multiply.py | 53 +++++++++ examples/python/native/ops/scalar_sub.py | 53 +++++++++ .../python/native/ops/scalar_true_divide.py | 53 +++++++++ examples/python/native/ops/sigmoid.py | 46 ++++++++ .../python/native/ops/sigmoid_silu_multi.py | 58 ++++++++++ examples/python/native/ops/sin.py | 44 +++++++ examples/python/native/ops/softmax.py | 46 ++++++++ .../ops/spec_inc_multihead_self_attention.py | 103 +++++++++++++++++ .../ops/spec_inc_multiquery_self_attention.py | 107 ++++++++++++++++++ examples/python/native/ops/split.py | 47 ++++++++ examples/python/native/ops/subtract.py | 45 ++++++++ examples/python/native/ops/tanh.py | 46 ++++++++ examples/python/native/ops/transpose.py | 38 +++++++ 56 files changed, 2898 insertions(+), 1 deletion(-) create mode 100644 examples/python/native/ops/add.py create mode 100644 examples/python/native/ops/add_bias_residual_layer_norm.py create mode 100644 examples/python/native/ops/arg_top_k.py create mode 100644 examples/python/native/ops/argmax.py create mode 100644 examples/python/native/ops/batch_matmul.py create mode 100644 examples/python/native/ops/batch_norm.py create mode 100644 examples/python/native/ops/beam_top_k.py create mode 100644 examples/python/native/ops/concat.py create mode 100644 examples/python/native/ops/conv2d.py create mode 100644 examples/python/native/ops/cos.py create mode 100644 examples/python/native/ops/dense.py create mode 100644 examples/python/native/ops/divide.py create mode 100644 examples/python/native/ops/dropout.py create mode 100644 examples/python/native/ops/elu.py create mode 100644 examples/python/native/ops/embedding.py create mode 100644 examples/python/native/ops/exp.py create mode 100644 examples/python/native/ops/flat.py create mode 100644 examples/python/native/ops/gather.py create mode 100644 examples/python/native/ops/gelu.py create mode 100644 examples/python/native/ops/identity.py create mode 100644 examples/python/native/ops/inc_multihead_self_attention.py create mode 100644 examples/python/native/ops/inc_multihead_self_attention_verify.py create mode 100644 examples/python/native/ops/inc_multiquery_self_attention.py create mode 100644 examples/python/native/ops/inc_multiquery_self_attention_verify.py create mode 100644 examples/python/native/ops/layer_norm.py create mode 100644 examples/python/native/ops/max.py create mode 100644 examples/python/native/ops/mean.py create mode 100644 examples/python/native/ops/min.py create mode 100644 examples/python/native/ops/multihead_attention.py create mode 100644 examples/python/native/ops/multiply.py create mode 100644 examples/python/native/ops/pool2d.py create mode 100644 examples/python/native/ops/pow.py create mode 100644 examples/python/native/ops/reduce_sum.py create mode 100644 examples/python/native/ops/relu.py create mode 100644 examples/python/native/ops/reshape.py create mode 100644 examples/python/native/ops/residual_layer_norm.py create mode 100644 examples/python/native/ops/residual_rms_norm.py create mode 100644 examples/python/native/ops/reverse.py create mode 100644 examples/python/native/ops/rms_norm.py create mode 100644 examples/python/native/ops/rsqrt.py create mode 100644 examples/python/native/ops/sampling.py create mode 100644 examples/python/native/ops/scalar_add.py create mode 100644 examples/python/native/ops/scalar_multiply.py create mode 100644 examples/python/native/ops/scalar_sub.py create mode 100644 examples/python/native/ops/scalar_true_divide.py create mode 100644 examples/python/native/ops/sigmoid.py create mode 100644 examples/python/native/ops/sigmoid_silu_multi.py create mode 100644 examples/python/native/ops/sin.py create mode 100644 examples/python/native/ops/softmax.py create mode 100644 examples/python/native/ops/spec_inc_multihead_self_attention.py create mode 100644 examples/python/native/ops/spec_inc_multiquery_self_attention.py create mode 100644 examples/python/native/ops/split.py create mode 100644 examples/python/native/ops/subtract.py create mode 100644 examples/python/native/ops/tanh.py create mode 100644 examples/python/native/ops/transpose.py diff --git a/docs/source/python/layers.rst b/docs/source/python/layers.rst index 91f12094e..1be91a8b1 100644 --- a/docs/source/python/layers.rst +++ b/docs/source/python/layers.rst @@ -3,7 +3,7 @@ Layers API ********** Layers are the basic building blocks of neural networks in FlexFlow. The inputs of a layer consists of a tensor or a list of tensors and some state variables, -and the outputs of a layer is a tensor or a list of tensors. +and the outputs of a layer is a tensor or a list of tensors. See https://github.com/flexflow/FlexFlow/examples/python/native/ops for an example for every layer .. automodule:: flexflow.core.flexflow_cffi :noindex: diff --git a/examples/python/native/ops/add.py b/examples/python/native/ops/add.py new file mode 100644 index 000000000..50b9d16fd --- /dev/null +++ b/examples/python/native/ops/add.py @@ -0,0 +1,45 @@ +# The basis for this test of the 'add' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_add(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + out = ffmodel.add(input_tensor1, input_tensor2) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + _ = test_add(ffconfig, input1, input2) diff --git a/examples/python/native/ops/add_bias_residual_layer_norm.py b/examples/python/native/ops/add_bias_residual_layer_norm.py new file mode 100644 index 000000000..6e8dffbc9 --- /dev/null +++ b/examples/python/native/ops/add_bias_residual_layer_norm.py @@ -0,0 +1,78 @@ +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_add_bias_residual_layer_norm(ffconfig, input_arr: np.ndarray, residual_arr: np.ndarray, axes: List[int], elementwise_affine: bool = True, eps: float = 1e-5, use_bias: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + residual_tensor = ffmodel.create_tensor(residual_arr.shape, DataType.DT_FLOAT) + + output_tensor, layer_norm_output = ffmodel.add_bias_residual_layer_norm( + input_tensor, + residual_tensor, + axes=axes, + elementwise_affine=elementwise_affine, + eps=eps, + use_bias=use_bias, + name="add_bias_residual_layer_norm_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + dataloader_residual = ffmodel.create_data_loader(residual_tensor, residual_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_residual.reset() + + dataloader_input.next_batch(ffmodel) + dataloader_residual.next_batch(ffmodel) + + ffmodel.forward() + + output_tensor.inline_map(ffmodel, ffconfig) + layer_norm_output.inline_map(ffmodel, ffconfig) + output_result = output_tensor.get_array(ffmodel, ffconfig) + layer_norm_result = layer_norm_output.get_array(ffmodel, ffconfig) + + return output_result, layer_norm_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + residual_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + axes_to_normalize = [1, 2] # Example axes to normalize + + output_result, layer_norm_result = test_add_bias_residual_layer_norm( + ffconfig, + input_data, + residual_data, + axes=axes_to_normalize, + elementwise_affine=True, + eps=1e-5, + use_bias=True + ) + + print("Input Array:") + print(input_data) + print("\nResidual Array:") + print(residual_data) + print(f"\nOutput Array after applying add_bias_residual_layer_norm along axes {axes_to_normalize}:") + print(output_result) + print("\nLayer Norm Result:") + print(layer_norm_result) diff --git a/examples/python/native/ops/arg_top_k.py b/examples/python/native/ops/arg_top_k.py new file mode 100644 index 000000000..79edc5dfa --- /dev/null +++ b/examples/python/native/ops/arg_top_k.py @@ -0,0 +1,61 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_arg_top_k(ffconfig, input_arr: np.ndarray, k: int, sorted: bool, speculative_decoding: bool, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + arg_top_k_output = ffmodel.arg_top_k( + input_tensor, + k, + sorted, + speculative_decoding, + name="arg_top_k_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_MEAN_SQUARED_ERROR, + metrics=[MetricsType.METRICS_MEAN_SQUARED_ERROR], + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + arg_top_k_output.inline_map(ffmodel, ffconfig) + output_result = arg_top_k_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10).astype(np.float32) + k_value = 5 + sorted_value = True + speculative_decoding_value = False # Example value for speculative_decoding + + output_result = test_arg_top_k( + ffconfig, + input_data, + k=k_value, + sorted=sorted_value, + speculative_decoding=speculative_decoding_value, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying arg_top_k:") + print(output_result) diff --git a/examples/python/native/ops/argmax.py b/examples/python/native/ops/argmax.py new file mode 100644 index 000000000..dda0e6b0b --- /dev/null +++ b/examples/python/native/ops/argmax.py @@ -0,0 +1,55 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_argmax(ffconfig, input_arr: np.ndarray, beam_search: bool, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + argmax_output = ffmodel.argmax( + input_tensor, + beam_search, + name="argmax_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + argmax_output.inline_map(ffmodel, ffconfig) + output_result = argmax_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10).astype(np.float32) + beam_search_value = True # Set to True or False based on your requirement + + output_result = test_argmax( + ffconfig, + input_data, + beam_search=beam_search_value, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying argmax:") + print(output_result) diff --git a/examples/python/native/ops/batch_matmul.py b/examples/python/native/ops/batch_matmul.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/python/native/ops/batch_norm.py b/examples/python/native/ops/batch_norm.py new file mode 100644 index 000000000..b243e79d3 --- /dev/null +++ b/examples/python/native/ops/batch_norm.py @@ -0,0 +1,36 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def ff(ffconfig, input_arr: np.ndarray): + ffmodel = FFModel(ffconfig) + # TODO: convert input to ff tensor + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.batch_norm( + input_tensor + ) + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = ff(ffconfig, input) diff --git a/examples/python/native/ops/beam_top_k.py b/examples/python/native/ops/beam_top_k.py new file mode 100644 index 000000000..cb2fdfb3d --- /dev/null +++ b/examples/python/native/ops/beam_top_k.py @@ -0,0 +1,58 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_beam_top_k(ffconfig, input_arr: np.ndarray, max_beam_size: int, sorted: bool, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + beam_top_k_output = ffmodel.beam_top_k( + input_tensor, + max_beam_size, + sorted, + name="beam_top_k_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + beam_top_k_output.inline_map(ffmodel, ffconfig) + output_result = beam_top_k_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10).astype(np.float32) + max_beam_size_value = 3 + sorted_value = True + + output_result = test_beam_top_k( + ffconfig, + input_data, + max_beam_size=max_beam_size_value, + sorted=sorted_value, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying beam_top_k:") + print(output_result) diff --git a/examples/python/native/ops/concat.py b/examples/python/native/ops/concat.py new file mode 100644 index 000000000..0088d7b84 --- /dev/null +++ b/examples/python/native/ops/concat.py @@ -0,0 +1,43 @@ +# The basis for this test of the 'concatenate' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_concatenate(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + out = ffmodel.concat([input_tensor1, input_tensor2], axis=1) + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = test_concatenate(ffconfig, input1, input2) diff --git a/examples/python/native/ops/conv2d.py b/examples/python/native/ops/conv2d.py new file mode 100644 index 000000000..02b3646aa --- /dev/null +++ b/examples/python/native/ops/conv2d.py @@ -0,0 +1,45 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def ff(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.conv2d( + input_tensor, + 32, + 3, + 3, + 1, + 1, + 1, + 1, + use_bias=False + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = ff(ffconfig, input) diff --git a/examples/python/native/ops/cos.py b/examples/python/native/ops/cos.py new file mode 100644 index 000000000..26f630768 --- /dev/null +++ b/examples/python/native/ops/cos.py @@ -0,0 +1,44 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_cos(ffconfig, input_arr: np.ndarray) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + cos_output = ffmodel.cos(input_tensor, name="cos_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + cos_output.inline_map(ffmodel, ffconfig) + cos_result = cos_output.get_array(ffmodel, ffconfig) + + return cos_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + cos_result = test_cos(ffconfig, input_data) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying cos function:") + print(cos_result) diff --git a/examples/python/native/ops/dense.py b/examples/python/native/ops/dense.py new file mode 100644 index 000000000..ec0a3dc65 --- /dev/null +++ b/examples/python/native/ops/dense.py @@ -0,0 +1,38 @@ +# The basis for this test of the 'dense' layer is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_dense(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.dense(input_tensor, 64, activation=ActiMode.AC_MODE_RELU) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 10).astype(np.float32) + _ = test_dense(ffconfig, input) diff --git a/examples/python/native/ops/divide.py b/examples/python/native/ops/divide.py new file mode 100644 index 000000000..419bf714a --- /dev/null +++ b/examples/python/native/ops/divide.py @@ -0,0 +1,48 @@ +# The basis for this test of the 'divide' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_divide(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + out = ffmodel.divide(input_tensor1, input_tensor2) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + # Avoid division by zero in input2 + input2 = np.where(input2 == 0, 1e-6, input2) + + _ = test_divide(ffconfig, input1, input2) diff --git a/examples/python/native/ops/dropout.py b/examples/python/native/ops/dropout.py new file mode 100644 index 000000000..3aa44a5a5 --- /dev/null +++ b/examples/python/native/ops/dropout.py @@ -0,0 +1,49 @@ +# The basis for this test of the 'Dropout' layer is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_dropout(ffconfig, input_arr: np.ndarray, dropout_rate: float = 0.5) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply Dropout layer + out = ffmodel.dropout(input_tensor, dropout_rate, 0) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + # You can adjust the dropout rate as needed + dropout_rate_param = 0.5 + + result = test_dropout(ffconfig, input_data, dropout_rate_param) + + print("Input Data:") + print(input_data) + + print("\nResult after Dropout layer:") + print(result) diff --git a/examples/python/native/ops/elu.py b/examples/python/native/ops/elu.py new file mode 100644 index 000000000..7a6ef1f62 --- /dev/null +++ b/examples/python/native/ops/elu.py @@ -0,0 +1,47 @@ +# The basis for this test of the 'ELU' activation function is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_elu(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply ELU activation + out = ffmodel.elu(input_tensor) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + result = test_elu(ffconfig, input_data) + + print("Input Data:") + print(input_data) + + print("\nResult after ELU activation:") + print(result) diff --git a/examples/python/native/ops/embedding.py b/examples/python/native/ops/embedding.py new file mode 100644 index 000000000..34bced379 --- /dev/null +++ b/examples/python/native/ops/embedding.py @@ -0,0 +1,39 @@ +# The basis for this test of the 'embedding' layer is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_embedding(ffconfig, input_arr: np.ndarray, vocab_size: int, embedding_dim: int) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_INT32) + + out = ffmodel.embedding(input_tensor, vocab_size, embedding_dim, AggrMode.AGGR_MODE_SUM) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + vocab_size = 1000 + embedding_dim = 50 + input = np.random.randint(low=0, high=vocab_size, size=(ffconfig.batch_size, 10), dtype=np.int32) + _ = test_embedding(ffconfig, input, vocab_size, embedding_dim) diff --git a/examples/python/native/ops/exp.py b/examples/python/native/ops/exp.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/python/native/ops/flat.py b/examples/python/native/ops/flat.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/python/native/ops/gather.py b/examples/python/native/ops/gather.py new file mode 100644 index 000000000..e13b6e4c7 --- /dev/null +++ b/examples/python/native/ops/gather.py @@ -0,0 +1,60 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_gather(ffconfig, input_arr: np.ndarray, index_arr: np.ndarray, dim: int, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + index_tensor = ffmodel.create_tensor(index_arr.shape, DataType.DT_INT32) + + gather_output = ffmodel.gather( + input_tensor, + index_tensor, + dim, + name="gather_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + dataloader_index = ffmodel.create_data_loader(index_tensor, index_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_index.reset() + + dataloader_input.next_batch(ffmodel) + dataloader_index.next_batch(ffmodel) + + ffmodel.forward() + + gather_output.inline_map(ffmodel, ffconfig) + output_result = gather_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + index_data = np.random.randint(0, 5, size=(ffconfig.batch_size,)).astype(np.int32) + dim_to_gather = 2 # Example dimension to gather along + + output_result = test_gather(ffconfig, input_data, index_data, dim=dim_to_gather) + + print("Input Array:") + print(input_data) + print("\nIndex Array:") + print(index_data) + print(f"\nOutput Array after applying gather along dimension {dim_to_gather}:") + print(output_result) diff --git a/examples/python/native/ops/gelu.py b/examples/python/native/ops/gelu.py new file mode 100644 index 000000000..84fabd36e --- /dev/null +++ b/examples/python/native/ops/gelu.py @@ -0,0 +1,51 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_gelu(ffconfig, input_arr: np.ndarray, inplace: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + gelu_output = ffmodel.gelu( + input_tensor, + inplace=inplace, + name="gelu_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + gelu_output.inline_map(ffmodel, ffconfig) + output_result = gelu_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + inplace_flag = True # Example inplace flag + + output_result = test_gelu(ffconfig, input_data, inplace=inplace_flag) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying gelu activation function (inplace={inplace_flag}):") + print(output_result) diff --git a/examples/python/native/ops/identity.py b/examples/python/native/ops/identity.py new file mode 100644 index 000000000..fbf63e717 --- /dev/null +++ b/examples/python/native/ops/identity.py @@ -0,0 +1,49 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_identity(ffconfig, input_arr: np.ndarray, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + identity_output = ffmodel.identity( + input_tensor, + name="identity_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + identity_output.inline_map(ffmodel, ffconfig) + output_result = identity_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + output_result = test_identity(ffconfig, input_data) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying identity function:") + print(output_result) diff --git a/examples/python/native/ops/inc_multihead_self_attention.py b/examples/python/native/ops/inc_multihead_self_attention.py new file mode 100644 index 000000000..dce7bd565 --- /dev/null +++ b/examples/python/native/ops/inc_multihead_self_attention.py @@ -0,0 +1,103 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_inc_multihead_self_attention( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + inc_multihead_self_attention_output = ffmodel.inc_multihead_self_attention( + input_tensor, + embed_dim, + num_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="inc_multihead_self_attention_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + inc_multihead_self_attention_output.inline_map(ffmodel, ffconfig) + output_result = inc_multihead_self_attention_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_heads_value = 8 + + output_result = test_inc_multihead_self_attention( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_heads=num_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying inc_multihead_self_attention:") + print(output_result) diff --git a/examples/python/native/ops/inc_multihead_self_attention_verify.py b/examples/python/native/ops/inc_multihead_self_attention_verify.py new file mode 100644 index 000000000..f6dc8e393 --- /dev/null +++ b/examples/python/native/ops/inc_multihead_self_attention_verify.py @@ -0,0 +1,103 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_inc_multihead_self_attention_verify( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + inc_multihead_self_attention_verify_output = ffmodel.inc_multihead_self_attention_verify( + input_tensor, + embed_dim, + num_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="inc_multihead_self_attention_verify_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + inc_multihead_self_attention_verify_output.inline_map(ffmodel, ffconfig) + output_result = inc_multihead_self_attention_verify_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_heads_value = 8 + + output_result = test_inc_multihead_self_attention_verify( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_heads=num_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying inc_multihead_self_attention_verify:") + print(output_result) diff --git a/examples/python/native/ops/inc_multiquery_self_attention.py b/examples/python/native/ops/inc_multiquery_self_attention.py new file mode 100644 index 000000000..33390ab1f --- /dev/null +++ b/examples/python/native/ops/inc_multiquery_self_attention.py @@ -0,0 +1,107 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_inc_multiquery_self_attention( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_q_heads: int, + num_kv_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + inc_multiquery_self_attention_output = ffmodel.inc_multiquery_self_attention( + input_tensor, + embed_dim, + num_q_heads, + num_kv_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="inc_multiquery_self_attention_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + inc_multiquery_self_attention_output.inline_map(ffmodel, ffconfig) + output_result = inc_multiquery_self_attention_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_q_heads_value = 4 + num_kv_heads_value = 4 + + output_result = test_inc_multiquery_self_attention( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_q_heads=num_q_heads_value, + num_kv_heads=num_kv_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying inc_multiquery_self_attention:") + print(output_result) diff --git a/examples/python/native/ops/inc_multiquery_self_attention_verify.py b/examples/python/native/ops/inc_multiquery_self_attention_verify.py new file mode 100644 index 000000000..69a76f68b --- /dev/null +++ b/examples/python/native/ops/inc_multiquery_self_attention_verify.py @@ -0,0 +1,107 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_inc_multiquery_self_attention_verify( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_q_heads: int, + num_kv_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + inc_multiquery_self_attention_verify_output = ffmodel.inc_multiquery_self_attention_verify( + input_tensor, + embed_dim, + num_q_heads, + num_kv_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="inc_multiquery_self_attention_verify_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + inc_multiquery_self_attention_verify_output.inline_map(ffmodel, ffconfig) + output_result = inc_multiquery_self_attention_verify_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_q_heads_value = 4 + num_kv_heads_value = 4 + + output_result = test_inc_multiquery_self_attention_verify( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_q_heads=num_q_heads_value, + num_kv_heads=num_kv_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying inc_multiquery_self_attention_verify:") + print(output_result) diff --git a/examples/python/native/ops/layer_norm.py b/examples/python/native/ops/layer_norm.py new file mode 100644 index 000000000..b3cca93d6 --- /dev/null +++ b/examples/python/native/ops/layer_norm.py @@ -0,0 +1,48 @@ +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_layer_norm(ffconfig, input_arr: np.ndarray, axes: List[int], elementwise_affine: bool = True, eps: float = 1e-5, use_bias: bool = True, name=None) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + layer_norm_output = ffmodel.layer_norm(input_tensor, axes=axes, elementwise_affine=elementwise_affine, eps=eps, use_bias=use_bias, name="layer_norm_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + layer_norm_output.inline_map(ffmodel, ffconfig) + layer_norm_result = layer_norm_output.get_array(ffmodel, ffconfig) + + return layer_norm_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + axes_to_normalize = [1, 2] # Example axes to normalize + + layer_norm_result = test_layer_norm(ffconfig, input_data, axes=axes_to_normalize, elementwise_affine=True, eps=1e-5, use_bias=True) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying layer_norm function along axes {axes_to_normalize}:") + print(layer_norm_result) diff --git a/examples/python/native/ops/max.py b/examples/python/native/ops/max.py new file mode 100644 index 000000000..bf9c62940 --- /dev/null +++ b/examples/python/native/ops/max.py @@ -0,0 +1,54 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_max(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + max_output = ffmodel.max(input_tensor1, input_tensor2, name="max_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input2.reset() + + dataloader_input1.next_batch(ffmodel) + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + max_output.inline_map(ffmodel, ffconfig) + max_result = max_output.get_array(ffmodel, ffconfig) + + return max_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input_data2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + max_result = test_max(ffconfig, input_data1, input_data2) + + print("Input Array 1:") + print(input_data1) + print("\nInput Array 2:") + print(input_data2) + print("\nOutput Array after applying max function:") + print(max_result) diff --git a/examples/python/native/ops/mean.py b/examples/python/native/ops/mean.py new file mode 100644 index 000000000..df8c3f642 --- /dev/null +++ b/examples/python/native/ops/mean.py @@ -0,0 +1,48 @@ +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_mean(ffconfig, input_arr: np.ndarray, dims: List[int], keepdims: bool = False) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + mean_output = ffmodel.mean(input_tensor, dims=dims, keepdims=keepdims, name="mean_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + mean_output.inline_map(ffmodel, ffconfig) + mean_result = mean_output.get_array(ffmodel, ffconfig) + + return mean_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + dims_to_mean = [1, 2] # Example dimensions to take the mean over + + mean_result = test_mean(ffconfig, input_data, dims=dims_to_mean, keepdims=False) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying mean function along dimensions {dims_to_mean}:") + print(mean_result) diff --git a/examples/python/native/ops/min.py b/examples/python/native/ops/min.py new file mode 100644 index 000000000..df81f4f2d --- /dev/null +++ b/examples/python/native/ops/min.py @@ -0,0 +1,54 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_min(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + min_output = ffmodel.min(input_tensor1, input_tensor2, name="min_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input2.reset() + + dataloader_input1.next_batch(ffmodel) + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + min_output.inline_map(ffmodel, ffconfig) + min_result = min_output.get_array(ffmodel, ffconfig) + + return min_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input_data2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + min_result = test_min(ffconfig, input_data1, input_data2) + + print("Input Array 1:") + print(input_data1) + print("\nInput Array 2:") + print(input_data2) + print("\nOutput Array after applying min function:") + print(min_result) diff --git a/examples/python/native/ops/multihead_attention.py b/examples/python/native/ops/multihead_attention.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/python/native/ops/multiply.py b/examples/python/native/ops/multiply.py new file mode 100644 index 000000000..fb4f48915 --- /dev/null +++ b/examples/python/native/ops/multiply.py @@ -0,0 +1,45 @@ +# The basis for this test of the 'multiply' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_multiply(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + out = ffmodel.multiply(input_tensor1, input_tensor2) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + _ = test_multiply(ffconfig, input1, input2) diff --git a/examples/python/native/ops/pool2d.py b/examples/python/native/ops/pool2d.py new file mode 100644 index 000000000..b4dc8b219 --- /dev/null +++ b/examples/python/native/ops/pool2d.py @@ -0,0 +1,36 @@ +# AI generated from conv2d example +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_pool2d(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.pool2d(input_tensor, 3, 3, 1, 1, 0, 0, PoolType.POOL_MAX) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = test_pool2d(ffconfig, input) \ No newline at end of file diff --git a/examples/python/native/ops/pow.py b/examples/python/native/ops/pow.py new file mode 100644 index 000000000..cf5bbebd8 --- /dev/null +++ b/examples/python/native/ops/pow.py @@ -0,0 +1,46 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_pow(ffconfig, input_arr: np.ndarray, exponent: float) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + pow_output = ffmodel.pow(input_tensor, exponent, name="pow_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + pow_output.inline_map(ffmodel, ffconfig) + pow_result = pow_output.get_array(ffmodel, ffconfig) + + return pow_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + exponent_value = 2.0 # Example exponent value + + pow_result = test_pow(ffconfig, input_data, exponent=exponent_value) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying pow function with exponent {exponent_value}:") + print(pow_result) diff --git a/examples/python/native/ops/reduce_sum.py b/examples/python/native/ops/reduce_sum.py new file mode 100644 index 000000000..7e7b41b79 --- /dev/null +++ b/examples/python/native/ops/reduce_sum.py @@ -0,0 +1,48 @@ +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_reduce_sum(ffconfig, input_arr: np.ndarray, axes: List[int], keepdims: bool = False) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + reduce_sum_output = ffmodel.reduce_sum(input_tensor, axes=axes, keepdims=keepdims, name="reduce_sum_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + reduce_sum_output.inline_map(ffmodel, ffconfig) + reduce_sum_result = reduce_sum_output.get_array(ffmodel, ffconfig) + + return reduce_sum_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + axes_to_reduce = [1, 2] # Example axes to reduce + + reduce_sum_result = test_reduce_sum(ffconfig, input_data, axes=axes_to_reduce, keepdims=False) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying reduce_sum along axes {axes_to_reduce}:") + print(reduce_sum_result) diff --git a/examples/python/native/ops/relu.py b/examples/python/native/ops/relu.py new file mode 100644 index 000000000..d855b2716 --- /dev/null +++ b/examples/python/native/ops/relu.py @@ -0,0 +1,46 @@ +# The basis for this test of the 'ReLU' activation function is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_relu(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply ReLU activation + out = ffmodel.relu(input_tensor) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + result = test_relu(ffconfig, input_data) + + print("Input Data:") + print(input_data) + + print("\nResult after ReLU activation:") + print(result) diff --git a/examples/python/native/ops/reshape.py b/examples/python/native/ops/reshape.py new file mode 100644 index 000000000..348d6bd93 --- /dev/null +++ b/examples/python/native/ops/reshape.py @@ -0,0 +1,41 @@ +# The basis for this test of the 'reshape' operation is generated by ChatGPT using the manually created conv2d.py as a template. + +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_reshape(ffconfig, input_arr: np.ndarray, target_shape: List[int]) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.reshape(input_tensor, target_shape) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + target_shape = [ffconfig.batch_size, 500] + + _ = test_reshape(ffconfig, input, target_shape) diff --git a/examples/python/native/ops/residual_layer_norm.py b/examples/python/native/ops/residual_layer_norm.py new file mode 100644 index 000000000..e12f2e53d --- /dev/null +++ b/examples/python/native/ops/residual_layer_norm.py @@ -0,0 +1,93 @@ +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_residual_layer_norm(ffconfig, input_arr: np.ndarray, residual1_arr: np.ndarray, residual2_arr: np.ndarray, use_two_residuals: bool, axes: List[int], elementwise_affine: bool = True, eps: float = 1e-5, use_bias: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + residual1_tensor = ffmodel.create_tensor(residual1_arr.shape, DataType.DT_FLOAT) + residual2_tensor = ffmodel.create_tensor(residual2_arr.shape, DataType.DT_FLOAT) + + output_tensor, layer_norm_output = ffmodel.residual_layer_norm( + input_tensor, + residual1_tensor, + residual2_tensor if use_two_residuals else None, + use_two_residuals, + axes=axes, + elementwise_affine=elementwise_affine, + eps=eps, + use_bias=use_bias, + name="residual_layer_norm_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + dataloader_residual1 = ffmodel.create_data_loader(residual1_tensor, residual1_arr) + dataloader_residual2 = ffmodel.create_data_loader(residual2_tensor, residual2_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_residual1.reset() + if use_two_residuals: + dataloader_residual2.reset() + + dataloader_input.next_batch(ffmodel) + dataloader_residual1.next_batch(ffmodel) + if use_two_residuals: + dataloader_residual2.next_batch(ffmodel) + + ffmodel.forward() + + output_tensor.inline_map(ffmodel, ffconfig) + layer_norm_output.inline_map(ffmodel, ffconfig) + output_result = output_tensor.get_array(ffmodel, ffconfig) + layer_norm_result = layer_norm_output.get_array(ffmodel, ffconfig) + + return output_result, layer_norm_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + residual1_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + residual2_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + use_two_residuals_flag = True # Example flag + + axes_to_normalize = [1, 2] # Example axes to normalize + + output_result, layer_norm_result = test_residual_layer_norm( + ffconfig, + input_data, + residual1_data, + residual2_data, + use_two_residuals_flag, + axes=axes_to_normalize, + elementwise_affine=True, + eps=1e-5, + use_bias=True + ) + + print("Input Array:") + print(input_data) + print("\nResidual1 Array:") + print(residual1_data) + if use_two_residuals_flag: + print("\nResidual2 Array:") + print(residual2_data) + print(f"\nOutput Array after applying residual_layer_norm along axes {axes_to_normalize} with use_two_residuals={use_two_residuals_flag}:") + print(output_result) + print("\nLayer Norm Result:") + print(layer_norm_result) diff --git a/examples/python/native/ops/residual_rms_norm.py b/examples/python/native/ops/residual_rms_norm.py new file mode 100644 index 000000000..9027dffad --- /dev/null +++ b/examples/python/native/ops/residual_rms_norm.py @@ -0,0 +1,80 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_residual_rms_norm( + ffconfig, + input1_arr: np.ndarray, + input2_arr: np.ndarray, + eps: float, + dim: int, + name=None, +): + ffmodel = FFModel(ffconfig) + + input1_tensor = ffmodel.create_tensor(input1_arr.shape, DataType.DT_FLOAT) + input2_tensor = ffmodel.create_tensor(input2_arr.shape, DataType.DT_FLOAT) + + residual_rms_norm_output1, residual_rms_norm_output2 = ffmodel.residual_rms_norm( + input1_tensor, + input2_tensor, + eps, + dim, + name="residual_rms_norm_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input1 = ffmodel.create_data_loader(input1_tensor, input1_arr) + dataloader_input2 = ffmodel.create_data_loader(input2_tensor, input2_arr) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + residual_rms_norm_output1.inline_map(ffmodel, ffconfig) + output_result1 = residual_rms_norm_output1.get_array(ffmodel, ffconfig) + + residual_rms_norm_output2.inline_map(ffmodel, ffconfig) + output_result2 = residual_rms_norm_output2.get_array(ffmodel, ffconfig) + + return output_result1, output_result2 + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + input2_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + eps_value = 1e-6 + dim_value = 1 # Example value for dim + + output_result1, output_result2 = test_residual_rms_norm( + ffconfig, + input1_data, + input2_data, + eps=eps_value, + dim=dim_value, + ) + + print("Input Array 1:") + print(input1_data) + print("\nInput Array 2:") + print(input2_data) + print("\nOutput Array 1 after applying residual_rms_norm:") + print(output_result1) + print("\nOutput Array 2 after applying residual_rms_norm:") + print(output_result2) diff --git a/examples/python/native/ops/reverse.py b/examples/python/native/ops/reverse.py new file mode 100644 index 000000000..25394d4b9 --- /dev/null +++ b/examples/python/native/ops/reverse.py @@ -0,0 +1,37 @@ +# The basis for this test of the 'reverse' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_reverse(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.reverse(input_tensor, axis=2) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = test_reverse(ffconfig, input) diff --git a/examples/python/native/ops/rms_norm.py b/examples/python/native/ops/rms_norm.py new file mode 100644 index 000000000..3983d7f89 --- /dev/null +++ b/examples/python/native/ops/rms_norm.py @@ -0,0 +1,64 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_rms_norm( + ffconfig, + input_arr: np.ndarray, + eps: float, + dim: int, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + rms_norm_output = ffmodel.rms_norm( + input_tensor, + eps, + dim, + name="rms_norm_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_MEAN_SQUARED_ERROR, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY], + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + rms_norm_output.inline_map(ffmodel, ffconfig) + output_result = rms_norm_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + eps_value = 1e-6 + dim_value = 1 # Example value for dim + + output_result = test_rms_norm( + ffconfig, + input_data, + eps=eps_value, + dim=dim_value, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying rms_norm:") + print(output_result) diff --git a/examples/python/native/ops/rsqrt.py b/examples/python/native/ops/rsqrt.py new file mode 100644 index 000000000..3d9ab6544 --- /dev/null +++ b/examples/python/native/ops/rsqrt.py @@ -0,0 +1,44 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_rsqrt(ffconfig, input_arr: np.ndarray) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + rsqrt_output = ffmodel.rsqrt(input_tensor, name="rsqrt_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + rsqrt_output.inline_map(ffmodel, ffconfig) + rsqrt_result = rsqrt_output.get_array(ffmodel, ffconfig) + + return rsqrt_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + rsqrt_result = test_rsqrt(ffconfig, input_data) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying rsqrt function:") + print(rsqrt_result) diff --git a/examples/python/native/ops/sampling.py b/examples/python/native/ops/sampling.py new file mode 100644 index 000000000..2219f09ef --- /dev/null +++ b/examples/python/native/ops/sampling.py @@ -0,0 +1,55 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_sampling(ffconfig, input_arr: np.ndarray, top_p: float, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + sampling_output = ffmodel.sampling( + input_tensor, + top_p, + name="sampling_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_MEAN_SQUARED_ERROR, + metrics=[MetricsType.METRICS_MEAN_SQUARED_ERROR], + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + sampling_output.inline_map(ffmodel, ffconfig) + output_result = sampling_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10).astype(np.float32) + top_p_value = 0.8 + + output_result = test_sampling( + ffconfig, + input_data, + top_p=top_p_value, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying sampling:") + print(output_result) diff --git a/examples/python/native/ops/scalar_add.py b/examples/python/native/ops/scalar_add.py new file mode 100644 index 000000000..48a316ea8 --- /dev/null +++ b/examples/python/native/ops/scalar_add.py @@ -0,0 +1,53 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_scalar_add(ffconfig, input_arr: np.ndarray, scalar: float, inplace: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + scalar_add_output = ffmodel.scalar_add( + input_tensor, + scalar, + inplace=inplace, + name="scalar_add_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + scalar_add_output.inline_map(ffmodel, ffconfig) + output_result = scalar_add_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + scalar_value = 2.0 # Example scalar value + inplace_flag = True # Example inplace flag + + output_result = test_scalar_add(ffconfig, input_data, scalar=scalar_value, inplace=inplace_flag) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying scalar addition with scalar value {scalar_value} (inplace={inplace_flag}):") + print(output_result) diff --git a/examples/python/native/ops/scalar_multiply.py b/examples/python/native/ops/scalar_multiply.py new file mode 100644 index 000000000..ebae5cce0 --- /dev/null +++ b/examples/python/native/ops/scalar_multiply.py @@ -0,0 +1,53 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_scalar_multiply(ffconfig, input_arr: np.ndarray, scalar: float, inplace: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + scalar_multiply_output = ffmodel.scalar_multiply( + input_tensor, + scalar, + inplace=inplace, + name="scalar_multiply_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + scalar_multiply_output.inline_map(ffmodel, ffconfig) + output_result = scalar_multiply_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + scalar_value = 2.0 # Example scalar value + inplace_flag = True # Example inplace flag + + output_result = test_scalar_multiply(ffconfig, input_data, scalar=scalar_value, inplace=inplace_flag) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying scalar multiplication with scalar value {scalar_value} (inplace={inplace_flag}):") + print(output_result) diff --git a/examples/python/native/ops/scalar_sub.py b/examples/python/native/ops/scalar_sub.py new file mode 100644 index 000000000..2dc467b57 --- /dev/null +++ b/examples/python/native/ops/scalar_sub.py @@ -0,0 +1,53 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_scalar_sub(ffconfig, input_arr: np.ndarray, scalar: float, inplace: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + scalar_sub_output = ffmodel.scalar_sub( + input_tensor, + scalar, + inplace=inplace, + name="scalar_sub_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + scalar_sub_output.inline_map(ffmodel, ffconfig) + output_result = scalar_sub_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + scalar_value = 2.0 # Example scalar value + inplace_flag = True # Example inplace flag + + output_result = test_scalar_sub(ffconfig, input_data, scalar=scalar_value, inplace=inplace_flag) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying scalar subtraction with scalar value {scalar_value} (inplace={inplace_flag}):") + print(output_result) diff --git a/examples/python/native/ops/scalar_true_divide.py b/examples/python/native/ops/scalar_true_divide.py new file mode 100644 index 000000000..f1b64df50 --- /dev/null +++ b/examples/python/native/ops/scalar_true_divide.py @@ -0,0 +1,53 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_scalar_true_divide(ffconfig, input_arr: np.ndarray, scalar: float, inplace: bool = True, name=None): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + scalar_true_divide_output = ffmodel.scalar_true_divide( + input_tensor, + scalar, + inplace=inplace, + name="scalar_true_divide_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + scalar_true_divide_output.inline_map(ffmodel, ffconfig) + output_result = scalar_true_divide_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + scalar_value = 2.0 # Example scalar value + inplace_flag = True # Example inplace flag + + output_result = test_scalar_true_divide(ffconfig, input_data, scalar=scalar_value, inplace=inplace_flag) + + print("Input Array:") + print(input_data) + print(f"\nOutput Array after applying scalar true division with scalar value {scalar_value} (inplace={inplace_flag}):") + print(output_result) diff --git a/examples/python/native/ops/sigmoid.py b/examples/python/native/ops/sigmoid.py new file mode 100644 index 000000000..0fbe21df4 --- /dev/null +++ b/examples/python/native/ops/sigmoid.py @@ -0,0 +1,46 @@ +# The basis for this test of the 'Sigmoid' activation function is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_sigmoid(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply Sigmoid activation + out = ffmodel.sigmoid(input_tensor) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + result = test_sigmoid(ffconfig, input_data) + + print("Input Data:") + print(input_data) + + print("\nResult after Sigmoid activation:") + print(result) diff --git a/examples/python/native/ops/sigmoid_silu_multi.py b/examples/python/native/ops/sigmoid_silu_multi.py new file mode 100644 index 000000000..cecc3e102 --- /dev/null +++ b/examples/python/native/ops/sigmoid_silu_multi.py @@ -0,0 +1,58 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_sigmoid_silu_multi(ffconfig, input1_arr: np.ndarray, input2_arr: np.ndarray, name=None): + ffmodel = FFModel(ffconfig) + + input1_tensor = ffmodel.create_tensor(input1_arr.shape, DataType.DT_FLOAT) + input2_tensor = ffmodel.create_tensor(input2_arr.shape, DataType.DT_FLOAT) + + sigmoid_silu_multi_output = ffmodel.sigmoid_silu_multi( + input1_tensor, + input2_tensor, + name="sigmoid_silu_multi_layer" + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input1 = ffmodel.create_data_loader(input1_tensor, input1_arr) + dataloader_input2 = ffmodel.create_data_loader(input2_tensor, input2_arr) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input2.reset() + + dataloader_input1.next_batch(ffmodel) + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + sigmoid_silu_multi_output.inline_map(ffmodel, ffconfig) + output_result = sigmoid_silu_multi_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + output_result = test_sigmoid_silu_multi(ffconfig, input1_data, input2_data) + + print("Input1 Array:") + print(input1_data) + print("\nInput2 Array:") + print(input2_data) + print("\nOutput Array after applying sigmoid_silu_multi:") + print(output_result) diff --git a/examples/python/native/ops/sin.py b/examples/python/native/ops/sin.py new file mode 100644 index 000000000..4b60a4e1d --- /dev/null +++ b/examples/python/native/ops/sin.py @@ -0,0 +1,44 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_sin(ffconfig, input_arr: np.ndarray) -> np.ndarray: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + sin_output = ffmodel.sin(input_tensor, name="sin_layer") + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + sin_output.inline_map(ffmodel, ffconfig) + sin_result = sin_output.get_array(ffmodel, ffconfig) + + return sin_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + sin_result = test_sin(ffconfig, input_data) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying sin function:") + print(sin_result) diff --git a/examples/python/native/ops/softmax.py b/examples/python/native/ops/softmax.py new file mode 100644 index 000000000..b5481bcc8 --- /dev/null +++ b/examples/python/native/ops/softmax.py @@ -0,0 +1,46 @@ +# The basis for this test of the 'Softmax' activation function is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_softmax(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply Softmax activation + out = ffmodel.softmax(input_tensor) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10).astype(np.float32) + + result = test_softmax(ffconfig, input_data) + + print("Input Data:") + print(input_data) + + print("\nResult after Softmax activation:") + print(result) diff --git a/examples/python/native/ops/spec_inc_multihead_self_attention.py b/examples/python/native/ops/spec_inc_multihead_self_attention.py new file mode 100644 index 000000000..bd1aaa189 --- /dev/null +++ b/examples/python/native/ops/spec_inc_multihead_self_attention.py @@ -0,0 +1,103 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_spec_inc_multihead_self_attention( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + spec_inc_multihead_self_attention_output = ffmodel.spec_inc_multihead_self_attention( + input_tensor, + embed_dim, + num_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="spec_inc_multihead_self_attention_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + spec_inc_multihead_self_attention_output.inline_map(ffmodel, ffconfig) + output_result = spec_inc_multihead_self_attention_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_heads_value = 8 + + output_result = test_spec_inc_multihead_self_attention( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_heads=num_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying spec_inc_multihead_self_attention:") + print(output_result) diff --git a/examples/python/native/ops/spec_inc_multiquery_self_attention.py b/examples/python/native/ops/spec_inc_multiquery_self_attention.py new file mode 100644 index 000000000..0b731c99e --- /dev/null +++ b/examples/python/native/ops/spec_inc_multiquery_self_attention.py @@ -0,0 +1,107 @@ +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_spec_inc_multiquery_self_attention( + ffconfig, + input_arr: np.ndarray, + embed_dim: int, + num_q_heads: int, + num_kv_heads: int, + kdim: int = 0, + vdim: int = 0, + dropout: float = 0.0, + bias: bool = True, + add_bias_kv: bool = False, + add_zero_attn: bool = False, + data_type: DataType = DataType.DT_NONE, + kernel_initializer=None, + apply_rotary_embedding: bool = False, + scaling_query: bool = False, + scaling_factor: float = 1.0, + qk_prod_scaling: bool = True, + position_bias: bool = False, + name=None, +): + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, data_type) + + spec_inc_multiquery_self_attention_output = ffmodel.spec_inc_multiquery_self_attention( + input_tensor, + embed_dim, + num_q_heads, + num_kv_heads, + kdim=kdim, + vdim=vdim, + dropout=dropout, + bias=bias, + add_bias_kv=add_bias_kv, + add_zero_attn=add_zero_attn, + data_type=data_type, + kernel_initializer=kernel_initializer, + apply_rotary_embedding=apply_rotary_embedding, + scaling_query=scaling_query, + scaling_factor=scaling_factor, + qk_prod_scaling=qk_prod_scaling, + position_bias=position_bias, + name="spec_inc_multiquery_self_attention_layer", + ) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY] + ) + + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + spec_inc_multiquery_self_attention_output.inline_map(ffmodel, ffconfig) + output_result = spec_inc_multiquery_self_attention_output.get_array(ffmodel, ffconfig) + + return output_result + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 10, 20).astype(np.float32) + embed_dim_value = 64 + num_q_heads_value = 4 + num_kv_heads_value = 4 + + output_result = test_spec_inc_multiquery_self_attention( + ffconfig, + input_data, + embed_dim=embed_dim_value, + num_q_heads=num_q_heads_value, + num_kv_heads=num_kv_heads_value, + kdim=0, # Example value for kdim + vdim=0, # Example value for vdim + dropout=0.1, # Example value for dropout + bias=True, + add_bias_kv=False, + add_zero_attn=False, + data_type=DataType.DT_FLOAT, + kernel_initializer=None, # Example value for kernel_initializer + apply_rotary_embedding=False, + scaling_query=False, + scaling_factor=1.0, + qk_prod_scaling=True, + position_bias=False, + ) + + print("Input Array:") + print(input_data) + print("\nOutput Array after applying spec_inc_multiquery_self_attention:") + print(output_result) diff --git a/examples/python/native/ops/split.py b/examples/python/native/ops/split.py new file mode 100644 index 000000000..d03a52a76 --- /dev/null +++ b/examples/python/native/ops/split.py @@ -0,0 +1,47 @@ +# The basis for this test of the 'split' operation is generated by ChatGPT using the manually created conv2d.py as a template. + +from typing import List + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_split(ffconfig, input_arr: np.ndarray) -> List[flexflow.core.Tensor]: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out1, out2 = ffmodel.split(input_tensor, 2, axis=1) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out1.inline_map(ffmodel, ffconfig) + out2.inline_map(ffmodel, ffconfig) + + return [out1.get_array(ffmodel, ffconfig), out2.get_array(ffmodel, ffconfig)] + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 10, 10, 10).astype(np.float32) + output_list = test_split(ffconfig, input) + + print("Output Tensor 1:") + print(output_list[0]) + + print("\nOutput Tensor 2:") + print(output_list[1]) diff --git a/examples/python/native/ops/subtract.py b/examples/python/native/ops/subtract.py new file mode 100644 index 000000000..5f829cbae --- /dev/null +++ b/examples/python/native/ops/subtract.py @@ -0,0 +1,45 @@ +# The basis for this test of the 'subtract' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_subtract(ffconfig, input_arr1: np.ndarray, input_arr2: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor1 = ffmodel.create_tensor(input_arr1.shape, DataType.DT_FLOAT) + input_tensor2 = ffmodel.create_tensor(input_arr2.shape, DataType.DT_FLOAT) + + out = ffmodel.subtract(input_tensor1, input_tensor2) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input1 = ffmodel.create_data_loader(input_tensor1, input_arr1) + dataloader_input2 = ffmodel.create_data_loader(input_tensor2, input_arr2) + + ffmodel.init_layers() + + dataloader_input1.reset() + dataloader_input1.next_batch(ffmodel) + + dataloader_input2.reset() + dataloader_input2.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input1 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + input2 = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + _ = test_subtract(ffconfig, input1, input2) diff --git a/examples/python/native/ops/tanh.py b/examples/python/native/ops/tanh.py new file mode 100644 index 000000000..ba4ba7d6f --- /dev/null +++ b/examples/python/native/ops/tanh.py @@ -0,0 +1,46 @@ +# The basis for this test of the 'tanh' activation function is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + +def test_tanh(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + # Apply tanh activation + out = ffmodel.tanh(input_tensor) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input_data = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + + result = test_tanh(ffconfig, input_data) + + print("Input Data:") + print(input_data) + + print("\nResult after tanh activation:") + print(result) diff --git a/examples/python/native/ops/transpose.py b/examples/python/native/ops/transpose.py new file mode 100644 index 000000000..6f514d660 --- /dev/null +++ b/examples/python/native/ops/transpose.py @@ -0,0 +1,38 @@ +# The basis for this test of the 'transpose' operation is generated by ChatGPT using the manually created conv2d.py as a template. + + +import flexflow.core +import numpy as np +from flexflow.core import * + + +def test_transpose(ffconfig, input_arr: np.ndarray) -> flexflow.core.Tensor: + ffmodel = FFModel(ffconfig) + + input_tensor = ffmodel.create_tensor(input_arr.shape, DataType.DT_FLOAT) + + out = ffmodel.transpose(input_tensor, [ffconfig.batch_size, 10, 5, 10]) + + ffoptimizer = SGDOptimizer(ffmodel, 0.001) + ffmodel.optimizer = ffoptimizer + ffmodel.compile( + loss_type=LossType.LOSS_SPARSE_CATEGORICAL_CROSSENTROPY, + metrics=[MetricsType.METRICS_ACCURACY, MetricsType.METRICS_SPARSE_CATEGORICAL_CROSSENTROPY]) + dataloader_input = ffmodel.create_data_loader(input_tensor, input_arr) + + ffmodel.init_layers() + + dataloader_input.reset() + dataloader_input.next_batch(ffmodel) + ffmodel.forward() + + out.inline_map(ffmodel, ffconfig) + return out.get_array(ffmodel, ffconfig) + + +if __name__ == '__main__': + init_flexflow_runtime() + ffconfig = FFConfig() + + input = np.random.randn(ffconfig.batch_size, 5, 10, 10).astype(np.float32) + _ = test_transpose(ffconfig, input) From a83effedd6e0185a7e8225f445c0aaba840c1aca Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Thu, 20 Jun 2024 04:08:29 +0000 Subject: [PATCH 4/7] add code to keep runners registered --- .github/workflows/docker-build.yml | 41 ++++++++++++++++++++---------- .github/workflows/gpu-ci.yml | 24 +++++++++++++++++ 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index d16179434..eeaab0e0a 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -9,9 +9,9 @@ on: branches: - "inference" - "master" - # schedule: - # # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated - # - cron: "0 8 * * 0" + schedule: + # At 00:00 on day-of-month 1, 14, and 28. + - cron: "0 0 1,14,28 * *" workflow_dispatch: # Cancel outdated workflows if they are still running @@ -58,13 +58,28 @@ jobs: - name: Check availability of flexflow modules in Python run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${hip_version}:latest -c "python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" - + + keep-runner-registered: + name: Keep runner alive + if: ${{ github.event_name == 'schedule' }} + runs-on: [self-hosted, rocm_builder] + defaults: + run: + shell: bash -l {0} # required to use an activated conda environment + env: + CONDA: "3" + needs: rocm-builder-start + steps: + - name: Keep alive + run: | + echo "Keep self-hosted runner registered with Github" + sleep 10m docker-build-and-publish-rocm: name: Build and Deploy FlexFlow Docker Containers (ROCm backend) needs: rocm-builder-start runs-on: [self-hosted, rocm_builder] - if: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + if: ${{ ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} strategy: matrix: hip_version: ["5.3", "5.4", "5.5", "5.6"] @@ -106,19 +121,19 @@ jobs: cuda_version: ${{ matrix.cuda_version }} steps: - name: Checkout Git Repository - if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} uses: actions/checkout@v3 with: submodules: recursive - name: Free additional space on runner - if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} run: .github/workflows/helpers/free_space_on_runner.sh - name: Build Docker container - if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} env: - deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} build_needed: ${{ matrix.cuda_version == '12.0' }} run: | # On push to inference, build for all compatible architectures, so that we can publish @@ -133,11 +148,11 @@ jobs: fi - name: Check availability of flexflow modules in Python - if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} + if: ${{ ( ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }} run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${cuda_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'" - name: Publish Docker environment image (on push to inference) - if: ${{ github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} + if: ${{ github.repository_owner == 'flexflow' && ( github.event_name == 'push' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} env: FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} run: | @@ -145,7 +160,7 @@ jobs: ./docker/publish.sh flexflow rocm-builder-stop: - needs: docker-build-and-publish-rocm + needs: [docker-build-and-publish-rocm, keep-runner-registered] if: ${{ always() && ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }} runs-on: ubuntu-latest name: Stop the AWS instance we used to build the ROCM Docker images @@ -166,7 +181,7 @@ jobs: name: Notify Slack in case of failure runs-on: ubuntu-20.04 needs: [docker-build-cuda, docker-build-and-publish-rocm] - if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} + if: ${{ failure() && github.event_name == 'workflow_dispatch' && github.repository_owner == 'flexflow' }} steps: - name: Send Slack message env: diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml index 7bdb6805a..c7d0cd72c 100644 --- a/.github/workflows/gpu-ci.yml +++ b/.github/workflows/gpu-ci.yml @@ -1,5 +1,7 @@ name: "gpu-ci" on: + schedule: + - cron: "0 0 1,14,28 * *" # At 00:00 on day-of-month 1, 14, and 28. push: branches: - "inference" @@ -43,8 +45,28 @@ jobs: pip3 install pygithub python3 .github/workflows/helpers/gpu_ci_helper.py + keep-runner-registered: + name: Keep runner alive + if: ${{ github.event_name == 'schedule' }} + runs-on: [self-hosted, gpu] + defaults: + run: + shell: bash -l {0} # required to use an activated conda environment + env: + CONDA: "3" + needs: gpu-ci-concierge + container: + image: ghcr.io/flexflow/flexflow-environment-cuda-11.8:latest + options: --gpus all --shm-size=8192m + steps: + - name: Keep alive + run: | + echo "Keep self-hosted runner registered with Github" + sleep 10m + python-interface-check: name: Check Python Interface + if: ${{ github.event_name != 'schedule' }} runs-on: [self-hosted, gpu] defaults: run: @@ -119,6 +141,7 @@ jobs: inference-tests: name: Inference Tests + if: ${{ github.event_name != 'schedule' }} runs-on: [self-hosted, gpu] defaults: run: @@ -195,6 +218,7 @@ jobs: training-tests: name: Training Tests + if: ${{ github.event_name != 'schedule' }} runs-on: [self-hosted, gpu] # skip this time-consuming test for PRs to the inference branch # if: ${{ github.event_name != 'pull_request' || github.base_ref != 'inference' }} From 4f82aaed6317cef0a2587848a3b6d57f1d709381 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Wed, 10 Jul 2024 23:15:28 -0400 Subject: [PATCH 5/7] fix docker --- docker/flexflow-environment/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/flexflow-environment/Dockerfile b/docker/flexflow-environment/Dockerfile index 6ca337f58..cef619ad6 100644 --- a/docker/flexflow-environment/Dockerfile +++ b/docker/flexflow-environment/Dockerfile @@ -37,6 +37,7 @@ RUN MINICONDA_SCRIPT_NAME=Miniconda3-py311_23.5.2-0-Linux-x86_64.sh; \ chmod +x ~/${MINICONDA_SCRIPT_NAME} && \ bash ~/${MINICONDA_SCRIPT_NAME} -b -p /opt/conda && \ rm ~/${MINICONDA_SCRIPT_NAME} && \ + /opt/conda/bin/conda config --set solver classic && \ /opt/conda/bin/conda upgrade --all && \ /opt/conda/bin/conda install conda-build conda-verify && \ /opt/conda/bin/conda clean -ya From 25fb40772f587892510bfe0ca296ae54768ff35c Mon Sep 17 00:00:00 2001 From: Zhihao Jia Date: Thu, 11 Jul 2024 15:16:40 -0400 Subject: [PATCH 6/7] [Tokenizer] update tokenizers-cpp repo --- deps/tokenizers-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/tokenizers-cpp b/deps/tokenizers-cpp index 4f42c9fa7..c0fab1e14 160000 --- a/deps/tokenizers-cpp +++ b/deps/tokenizers-cpp @@ -1 +1 @@ -Subproject commit 4f42c9fa74946d70af86671a3804b6f2433e5dac +Subproject commit c0fab1e14a9421c1501acee5b7703e5dafa60479 From 6a1a1886909fc864aadfb10823077f94fe03b72e Mon Sep 17 00:00:00 2001 From: Zhihao Jia Date: Sat, 3 Aug 2024 08:31:37 -0700 Subject: [PATCH 7/7] minor bug fix (#1456) --- .../ops/kernels/inc_multihead_self_attention_kernels.h | 3 ++- src/ops/attention.cu | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h b/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h index 9bf2f581e..26dcf1242 100644 --- a/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h +++ b/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h @@ -56,7 +56,8 @@ __global__ void apply_proj_bias_qkv(DT *input_ptr, int num_heads, int num_kv_heads, bool scaling_query, - float scaling_factor); + float scaling_factor, + int hidden_size); #if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA) template diff --git a/src/ops/attention.cu b/src/ops/attention.cu index 9b8b90da7..18fc810ae 100644 --- a/src/ops/attention.cu +++ b/src/ops/attention.cu @@ -206,7 +206,7 @@ MultiHeadAttentionMeta::MultiHeadAttentionMeta(FFHandler handler, checkCUDNN(cudnnCreateSeqDataDescriptor(&oDesc)); // Currently do not support adding bias to key/value projection assert(!attn->add_bias_kv); - cudnnAttnQueryMap_t attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; + unsigned attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; // Assume no beam search for now int maxBeamSize = 1; // printf("batchSize(%d) qSize(%d) kSize(%d) vSize(%d) qProjSize(%d)