Skip to content

Commit dec7081

Browse files
authored
Merge pull request #711 from basetenlabs/bump-version-0.7.14
Release 0.7.14
2 parents ee53336 + 7a73cc4 commit dec7081

24 files changed

+529
-308
lines changed

bin/generate_truss_examples.py

+24-12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
```
77
$ poetry run python bin/generate_truss_examples.py
88
```
9+
10+
Development:
11+
12+
Run this on a branch of truss-examples repo with:
13+
14+
$ poetry run python bin/generate_truss_examples.py $BRANCH_NAME
915
"""
1016
import enum
1117
import json
@@ -20,6 +26,7 @@
2026

2127
DOC_CONFIGURATION_FILE = "doc.yaml"
2228
TRUSS_EXAMPLES_REPO = "https://github.com/basetenlabs/truss-examples"
29+
DEFAULT_BRANCH = "main"
2330
DESTINATION_DIR = "truss-examples"
2431
MINT_CONFIG_PATH = "docs/mint.json"
2532

@@ -29,7 +36,7 @@ class FileType(enum.Enum):
2936
PYTHON = "python"
3037

3138

32-
def clone_repo():
39+
def clone_repo(branch: str):
3340
"""
3441
If the destination directory exists, remove it.
3542
Then, clone the given repo into the specified directory.
@@ -41,6 +48,7 @@ def clone_repo():
4148
subprocess.run(
4249
["git", "clone", TRUSS_EXAMPLES_REPO, DESTINATION_DIR], check=True
4350
)
51+
subprocess.run(["git", "checkout", branch], cwd=DESTINATION_DIR, check=True)
4452
print(f"Successfully cloned {TRUSS_EXAMPLES_REPO} to {DESTINATION_DIR}")
4553
except subprocess.CalledProcessError as e:
4654
print(f"Error cloning the repo: {e}")
@@ -71,9 +79,9 @@ def _get_example_destination(truss_directory: str) -> Path:
7179
Get the destination directory for the example.
7280
"""
7381
original_path = Path(truss_directory)
74-
folder, example = original_path.parts[1:]
75-
example_file = f"{example}.mdx"
76-
return Path("docs/examples") / folder / example_file
82+
example_path = "/".join(original_path.parts[1:])
83+
example_file_path = f"{example_path}.mdx"
84+
return Path("docs/examples") / example_file_path
7785

7886

7987
def _get_file_type(file_path: str) -> FileType:
@@ -260,7 +268,9 @@ def update_toc(example_dirs: List[str]):
260268
"""
261269

262270
# Exclude the root directory ("truss_examples") from the path
263-
transformed_example_paths = [Path(example).parts[1:] for example in example_dirs]
271+
transformed_example_paths = [
272+
"/".join(Path(example).parts[1:]) for example in example_dirs
273+
]
264274

265275
mint_config = json.loads(fetch_file_contents(MINT_CONFIG_PATH))
266276
navigation = mint_config["navigation"]
@@ -269,24 +279,21 @@ def update_toc(example_dirs: List[str]):
269279

270280
# Sort examples by the group name
271281
examples_section["pages"] = [
272-
f"examples/{example_path[0]}/{example_path[1]}"
273-
for example_path in sorted(
274-
transformed_example_paths, key=lambda example: example[0]
275-
)
282+
f"examples/{example_path}" for example_path in sorted(transformed_example_paths)
276283
]
277284

278285
serialized_mint_config = json.dumps(mint_config, indent=2)
279286
Path(MINT_CONFIG_PATH).write_text(serialized_mint_config)
280287

281288

282-
def generate_truss_examples():
289+
def generate_truss_examples(branch: str = DEFAULT_BRANCH):
283290
"""
284291
Walk through the Truss examples repo, and for each
285292
of the examples in the repo, generate documentation.
286293
287294
Finish the process by updating the table of contents.
288295
"""
289-
clone_repo()
296+
clone_repo(branch)
290297

291298
example_dirs = _fetch_example_dirs(DESTINATION_DIR)
292299
for truss_directory in example_dirs:
@@ -296,4 +303,9 @@ def generate_truss_examples():
296303

297304

298305
if __name__ == "__main__":
299-
generate_truss_examples()
306+
# The first arg is optionally the branch name
307+
# of truss-examples repo to use.
308+
if len(sys.argv) > 1:
309+
generate_truss_examples(sys.argv[1])
310+
else:
311+
generate_truss_examples()

docs/_snippets/config-params.mdx

+209-22
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,35 @@
1-
<ParamField body="description" type="str">
1+
### `model_name`
2+
3+
Name of your model
4+
### `description`
5+
26
Describe your model for documentation purposes.
3-
</ParamField>
4-
<ParamField body="environment_variables" type="Dict[str, str]">
7+
### `model_class_name`
8+
(default: `Model`)
9+
10+
The name of the class that defines your Truss model. Note that this class must implement
11+
at least a `predict` method.
12+
### `model_module_dir`
13+
(default: `model`)
14+
15+
Folder in the Truss where to find the model class.
16+
### `data_dir`
17+
(default: `data/`)
18+
19+
Folder where to place data files in your Truss. Note that you can access this within your model like so:
20+
21+
22+
```python model/model.py
23+
class Model:
24+
def __init__(self, **kwargs):
25+
data_dir = kwargs["data_dir"]
26+
27+
...
28+
```
29+
30+
31+
### `environment_variables`
32+
533
<Warning>
634
Do not store secret values directly in environment variables (or anywhere in the config file). See the `secrets` arg for information on properly managing secrets.
735
</Warning>
@@ -13,19 +41,19 @@ environment_variables:
1341
ENVIRONMENT: Staging
1442
DB_URL: https://my_database.example.com/
1543
```
16-
</ParamField>
17-
<ParamField body="model_metadata" type="Dict[str, str]">
44+
45+
### `model_metadata`
1846
Set any additional metadata in this catch-all field. The entire contents of the config file are available to the model at runtime, so this is a good place to store any custom information that model needs. For example, scikit-learn models include a flag here that indicates whether the model supports returning probabilities alongside predictions.
1947

2048
```yaml
2149
model_metadata:
2250
supports_predict_proba: true
2351
```
24-
</ParamField>
25-
<ParamField body="model_name" type="str">
26-
The model's name, for documentation purposes.
27-
</ParamField>
28-
<ParamField body="requirements" type="List[str]">
52+
53+
This is also where display metdata can be stored
54+
55+
### `requirements`
56+
2957
List the Python dependencies that the model depends on. The requirements should be provided in the [pip requirements file format](https://pip.pypa.io/en/stable/reference/requirements-file-format/), but as a yaml list.
3058

3159
We strongly recommend pinning versions in your requirements.
@@ -38,19 +66,46 @@ requirements:
3866
- numpy==1.20.3
3967
- scipy==1.7.3
4068
```
41-
</ParamField>
42-
<ParamField body="resources" type="Dict[str, str]">
43-
Specify model server runtime resources such as CPU, RAM and GPU.
69+
70+
### `resources`
71+
72+
The `resources` section is where you specify the compute resources that your model needs. This includes CPU, memory, and GPU resources.
73+
If you need a GPU, you must also set `resources.use_gpu` to `true`.
74+
75+
#### `resources.cpu`
76+
77+
CPU resources needed, expressed as either a raw number, or "millicpus". For example, `1000m` and `1` are equivalent.
78+
Fractional CPU amounts can be requested using millicpus. For example, `500m` is half of a CPU core.
79+
80+
#### `resources.memory`
81+
82+
CPU RAM needed, expressed as a number with units. Units acceptable include "Gi" (Gibibytes), "G" (Gigabytes), "Mi" (Mebibytes), and"M" (Megabytes). For example, `1Gi` and `1024Mi` are equivalent.
83+
84+
#### `resources.use_gpu`
85+
86+
Whether or not a GPU is required for this model.
87+
88+
#### `resources.accelerator`
89+
90+
Which GPU you would like for your instance. Available Nvidia GPUs supported in Truss include:
91+
* T4
92+
* L4
93+
* A10G
94+
* V100
95+
* A100
96+
97+
Note that if you need multiple GPUs to server your model, you can use the `:` operator to request multiple
98+
GPUs on your instance, eg:
4499

45100
```yaml
46101
resources:
47-
cpu: "3"
48-
memory: 14Gi
49-
use_gpu: true
50-
accelerator: A10G
102+
...
103+
accelerator: A10G:2 # Requests 2 A10Gs
104+
51105
```
52-
</ParamField>
53-
<ParamField body="secrets" type="Dict[str, str]">
106+
107+
108+
### `secrets`
54109
<Warning>
55110
This field can be used to specify the keys for such secrets and dummy default
56111
values. ***Never store actual secret values in the config***. Dummy default
@@ -66,8 +121,8 @@ information from s3 and may need access to AWS credentials for that.
66121
secrets:
67122
hf_access_token: "ACCESS TOKEN"
68123
```
69-
</ParamField>
70-
<ParamField body="system_packages" type="List[str]">
124+
125+
### `system_packages`
71126
Specify any system packages that you would typically install using `apt` on a Debian operating system.
72127

73128
```yaml
@@ -76,4 +131,136 @@ system_packages:
76131
- libsm6
77132
- libxext6
78133
```
79-
</ParamField>
134+
135+
### `python_version`
136+
137+
Which version of Python you'd like to use. Supported versions include:
138+
139+
* py39
140+
* py310
141+
* py311
142+
143+
### `base_image`
144+
145+
The `base_image` option is used if you need to bring your own custom base image.
146+
Custom base images are useful if there are scripts that need to run at build time, or dependencies
147+
that are complicated to install. After creating a custom base image, you can specify it
148+
in this field.
149+
150+
See [Custom Base Images](guides/base-images) for more detail on how to use these.
151+
152+
#### `base_image.image`
153+
154+
A path to the docker image you'd like to use, as
155+
an example, `nvcr.io/nvidia/nemo:23.03`.
156+
157+
#### `base_image.python_executable_path`
158+
159+
A path to the Python executable on the image. For instance, `/usr/bin/python`.
160+
161+
Tying it together, a custom base image configuration might look
162+
like this:
163+
164+
```yaml
165+
base_image:
166+
image: nvcr.io/nvidia/nemo:23.03
167+
python_executable_path: /usr/bin/python
168+
```
169+
### `runtime`
170+
171+
Runtime settings for your model instance.
172+
173+
#### `runtime.predict_concurrency`
174+
(default: `1`)
175+
176+
This field governs how much concurrency can run in the predict method of your model. This is useful
177+
if you have a model that has support for parallelism, and you'd like to take advantage of that.
178+
By default, this value is set to 1, implying that `predict` can only run for one request at a time.
179+
This protects the GPU from being over-utilized, and is a good default for many models.
180+
181+
See [How to configure concurrency](guides/concurrency) for more detail on how to set this value.
182+
### `external_data`
183+
184+
Use `external_data` if you have data that you want to be bundled in your image at build time.
185+
This is useful if you have a large amount of data that you want to be available to your model.
186+
By including it at build-time, you reduce the cold-start time of your instance, as the data is
187+
already available in the image. You can use it like so:
188+
189+
```yaml config.yaml
190+
external_data:
191+
- url: https://my-bucket.s3.amazonaws.com/my-data.tar.gz
192+
local_data_path: data/my-data.tar.gz
193+
name: my-data
194+
```
195+
#### `external_data.<list_item>.url`
196+
197+
The URL to download data from.
198+
#### `external_data.<list_item>.local_data_path`
199+
200+
The path on the image where the data will be downloaded to.
201+
#### `external_data.<list_item>.name`
202+
203+
You can set a name for the data, which is useful for readability-purposes.
204+
Not required.
205+
### `build`
206+
207+
The `build` section is used to define options for custom servers.
208+
The two main model servers we support are `TGI` an `vLLM`. These are
209+
highly optimized servers that are built to support specific LLMs.
210+
211+
See the following examples for how to use each of these:
212+
* [TGI](examples/07-high-performance-tgi)
213+
* [vLLM](examples/08-high-performance-vllm)
214+
215+
Example configuration for TGI, running Falcon-7B:
216+
217+
```yaml config.yaml
218+
build:
219+
arguments:
220+
endpoint: generate_stream
221+
model_id: tiiuae/falcon-7b
222+
model_server: TGI
223+
```
224+
225+
#### `build.model_server`
226+
227+
Either `VLLM` for vLLM, or `TGI` for TGI.
228+
229+
#### `build.arguments`
230+
231+
The arguments for the model server. This includes information such as which model you intend to load, and
232+
which endpoin from the server you'd like to use.
233+
234+
### `hf_cache`
235+
236+
The `hf_cache` section is used for caching model weights at build-time. This is one of the biggest levers
237+
for decreasing cold start times, as downloading weights can be one of the lengthiest parts of starting a new
238+
model instance. Using this section ensures that model weights are cached at _build_ time.
239+
240+
See the [model cache guide](guides/model-cache) for the full details on how to use this field.
241+
242+
<Note>
243+
Despite the fact that this field is called the `hf_cache`, there are multiple backends supported, not just Hugging Face. You can
244+
also cache weights stored on GCS, for instance.
245+
</Note>
246+
247+
#### `hf_cache.<list_item>.repo_id`
248+
249+
The endpoint for your cloud bucket. Currently, we support Hugging Face and Google Cloud Storage.
250+
251+
Example: `madebyollin/sdxl-vae-fp16-fix` for a Hugging Face repo, or `gcs://path-to-my-bucket` for
252+
a GCS bucket.
253+
254+
#### `hf_cache.<list_item>.revision`
255+
256+
Points to your revision. This is only relevant if you are pulling By default, it refers to `main`.
257+
258+
#### `hf_cache.<list_item>.allow_patterns`
259+
260+
Only cache files that match specified patterns. Utilize Unix shell-style wildcards to denote these patterns.
261+
By default, all paths are included.
262+
263+
#### `hf_cache.<list_item>.ignore_patterns`
264+
265+
Conversely, you can also denote file patterns to ignore, hence streamlining the caching process.
266+
By default, nothing is ignored.

docs/examples/1_introduction/getting-started-bert.mdx docs/examples/01-getting-started-bert.mdx

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: "Building your first Truss"
66

77
<Card
88
title="View on Github"
9-
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/1_introduction/getting-started-bert">
9+
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/01-getting-started-bert">
1010
</Card>
1111

1212
In this example, we go through building your first Truss model. We'll be using the HuggingFace transformers

docs/examples/3_LLMs/llm.mdx docs/examples/02-llm.mdx

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: "Building an LLM"
66

77
<Card
88
title="View on Github"
9-
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/3_LLMs/llm">
9+
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/02-llm">
1010
</Card>
1111

1212
In this example, we go through a Truss that serves an LLM. We

docs/examples/3_LLMs/llm-with-streaming.mdx docs/examples/03-llm-with-streaming.mdx

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ description: "Building an LLM with streaming output"
66

77
<Card
88
title="View on Github"
9-
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/3_LLMs/llm-with-streaming">
9+
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/03-llm-with-streaming">
1010
</Card>
1111

1212
In this example, we go through a Truss that serves an LLM, and streams the output to the client.

0 commit comments

Comments
 (0)