|
| 1 | +""" |
| 2 | +Script to take the Truss examples in https://github.com/basetenlabs/truss-examples, |
| 3 | +and generate documentation. |
| 4 | +
|
| 5 | +Usage: |
| 6 | +``` |
| 7 | +$ poetry run python bin/generate_truss_examples.py |
| 8 | +``` |
| 9 | +""" |
| 10 | +import enum |
| 11 | +import json |
| 12 | +import os |
| 13 | +import shutil |
| 14 | +import subprocess |
| 15 | +import sys |
| 16 | +from pathlib import Path |
| 17 | +from typing import List, Optional, Tuple |
| 18 | + |
| 19 | +import yaml |
| 20 | + |
| 21 | +DOC_CONFIGURATION_FILE = "doc.yaml" |
| 22 | +TRUSS_EXAMPLES_REPO = "https://github.com/basetenlabs/truss-examples" |
| 23 | +DESTINATION_DIR = "truss-examples" |
| 24 | +MINT_CONFIG_PATH = "docs/mint.json" |
| 25 | + |
| 26 | + |
| 27 | +class FileType(enum.Enum): |
| 28 | + YAML = "yaml" |
| 29 | + PYTHON = "python" |
| 30 | + |
| 31 | + |
| 32 | +def clone_repo(): |
| 33 | + """ |
| 34 | + If the destination directory exists, remove it. |
| 35 | + Then, clone the given repo into the specified directory. |
| 36 | + """ |
| 37 | + if Path(DESTINATION_DIR).exists(): |
| 38 | + shutil.rmtree(DESTINATION_DIR) |
| 39 | + |
| 40 | + try: |
| 41 | + subprocess.run( |
| 42 | + ["git", "clone", TRUSS_EXAMPLES_REPO, DESTINATION_DIR], check=True |
| 43 | + ) |
| 44 | + print(f"Successfully cloned {TRUSS_EXAMPLES_REPO} to {DESTINATION_DIR}") |
| 45 | + except subprocess.CalledProcessError as e: |
| 46 | + print(f"Error cloning the repo: {e}") |
| 47 | + sys.exit(1) |
| 48 | + |
| 49 | + |
| 50 | +def fetch_file_contents(path: str): |
| 51 | + with open(path, "r") as f: |
| 52 | + return f.read() |
| 53 | + |
| 54 | + |
| 55 | +def _fetch_example_dirs(root_dir: str) -> List[str]: |
| 56 | + """ |
| 57 | + Walk through the directory structure from the root directory and |
| 58 | + find all directories that have the specified file in it. |
| 59 | + """ |
| 60 | + dirs_with_file = [] |
| 61 | + |
| 62 | + for dirpath, _, filenames in os.walk(root_dir): |
| 63 | + if DOC_CONFIGURATION_FILE in filenames: |
| 64 | + dirs_with_file.append(dirpath) |
| 65 | + |
| 66 | + return dirs_with_file |
| 67 | + |
| 68 | + |
| 69 | +def _get_example_destination(truss_directory: str) -> Path: |
| 70 | + """ |
| 71 | + Get the destination directory for the example. |
| 72 | + """ |
| 73 | + original_path = Path(truss_directory) |
| 74 | + folder, example = original_path.parts[1:] |
| 75 | + example_file = f"{example}.mdx" |
| 76 | + return Path("docs/examples") / folder / example_file |
| 77 | + |
| 78 | + |
| 79 | +def _get_file_type(file_path: str) -> FileType: |
| 80 | + extension = Path(file_path).suffix |
| 81 | + if extension == ".yaml": |
| 82 | + return FileType.YAML |
| 83 | + |
| 84 | + if extension == ".py": |
| 85 | + return FileType.PYTHON |
| 86 | + |
| 87 | + raise ValueError(f"Unknown file type: {extension}") |
| 88 | + |
| 89 | + |
| 90 | +class ContentBlock: |
| 91 | + def formatted_content(self) -> str: |
| 92 | + raise NotImplementedError |
| 93 | + |
| 94 | + |
| 95 | +class CodeBlock(ContentBlock): |
| 96 | + def __init__(self, file_type: FileType, file_path: str): |
| 97 | + self.file_type = file_type |
| 98 | + self.file_path = file_path |
| 99 | + self.content = "" |
| 100 | + |
| 101 | + def formatted_content(self) -> str: |
| 102 | + """ |
| 103 | + Outputs code blocks in the format: |
| 104 | +
|
| 105 | + ```python main.py |
| 106 | + def main(): |
| 107 | + ... |
| 108 | + ``` |
| 109 | + """ |
| 110 | + return f"\n```{self.file_type.value} {self.file_path}\n{self.content}```" |
| 111 | + |
| 112 | + |
| 113 | +class MarkdownBlock(ContentBlock): |
| 114 | + def __init__(self, content: str): |
| 115 | + self.content = content |
| 116 | + |
| 117 | + def formatted_content(self) -> str: |
| 118 | + # Remove the first comment and space character, such that |
| 119 | + # "# Hello" becomes "Hello |
| 120 | + return self.content.strip()[2:] |
| 121 | + |
| 122 | + |
| 123 | +class MarkdownExtractor: |
| 124 | + """ |
| 125 | + Class that supports ingesting a code file line-by-line, and produces a formatted |
| 126 | + mdx file. |
| 127 | + """ |
| 128 | + |
| 129 | + def __init__(self, file_type: FileType, file_path: str): |
| 130 | + self.file_type = file_type |
| 131 | + self.file_path = file_path |
| 132 | + |
| 133 | + self.blocks: List[ContentBlock] = [] |
| 134 | + self.current_code_block: Optional[CodeBlock] = None |
| 135 | + |
| 136 | + def ingest(self, line: str): |
| 137 | + """ |
| 138 | + For each line, check that it is a comment by the presence of "#". |
| 139 | + If it is a comment, append it to the blocks. |
| 140 | +
|
| 141 | + If it is not a comment, either append to the current code block, or |
| 142 | + create a new code block if this isn't one. |
| 143 | +
|
| 144 | + When this is finished, we can then very easily produce the mdx file. |
| 145 | + """ |
| 146 | + stripped_line = line.strip() |
| 147 | + |
| 148 | + # Case of Markdown line |
| 149 | + if stripped_line.startswith("#"): |
| 150 | + self.current_code_block = None |
| 151 | + self.blocks.append(MarkdownBlock(line)) |
| 152 | + else: |
| 153 | + if self.current_code_block is None: |
| 154 | + self.current_code_block = CodeBlock(self.file_type, self.file_path) |
| 155 | + self.blocks.append(self.current_code_block) |
| 156 | + self.current_code_block.content += line + "\n" |
| 157 | + |
| 158 | + def _formatted_request_example(self) -> str: |
| 159 | + """ |
| 160 | + A key part of the mdx file is that each has a <RequestExample> block at the |
| 161 | + bottom the file. This generates that for the given file by appending all the |
| 162 | + CodeBlocks together. |
| 163 | + """ |
| 164 | + code_blocks = [block for block in self.blocks if isinstance(block, CodeBlock)] |
| 165 | + code_content = "".join([code_block.content for code_block in code_blocks]) |
| 166 | + |
| 167 | + return f"""```{self.file_type.value} {self.file_path}\n{code_content}```""" |
| 168 | + |
| 169 | + def mdx_content(self) -> Tuple[str, str]: |
| 170 | + full_content = "\n".join([block.formatted_content() for block in self.blocks]) |
| 171 | + |
| 172 | + return ( |
| 173 | + full_content + "\n", |
| 174 | + self._formatted_request_example(), |
| 175 | + ) |
| 176 | + |
| 177 | + |
| 178 | +def _extract_mdx_content_and_code(full_file_path: str, path: str) -> Tuple[str, str]: |
| 179 | + file_content = fetch_file_contents(full_file_path) |
| 180 | + file_type = _get_file_type(path) |
| 181 | + extractor = MarkdownExtractor(file_type, path) |
| 182 | + for line in file_content.splitlines(): |
| 183 | + extractor.ingest(line) |
| 184 | + |
| 185 | + return extractor.mdx_content() |
| 186 | + |
| 187 | + |
| 188 | +def _generate_request_example_block(code: str): |
| 189 | + return f""" |
| 190 | +<RequestExample> |
| 191 | +{code} |
| 192 | +</RequestExample> |
| 193 | +""" |
| 194 | + |
| 195 | + |
| 196 | +def _generate_truss_example(truss_directory: str): |
| 197 | + print("Generating example for: ", truss_directory) |
| 198 | + doc_information = yaml.safe_load( |
| 199 | + fetch_file_contents(f"{truss_directory}/{DOC_CONFIGURATION_FILE}") |
| 200 | + ) |
| 201 | + |
| 202 | + example_destination = _get_example_destination(truss_directory) |
| 203 | + |
| 204 | + header = f"""--- |
| 205 | +title: "{doc_information["title"]}" |
| 206 | +description: "{doc_information["description"]}" |
| 207 | +--- |
| 208 | +""" |
| 209 | + |
| 210 | + path_in_examples_repo = "/".join(Path(truss_directory).parts[1:]) |
| 211 | + link_to_github = f""" |
| 212 | + <Card |
| 213 | + title="View on Github" |
| 214 | + icon="github" href="{TRUSS_EXAMPLES_REPO}/tree/main/{path_in_examples_repo}"> |
| 215 | + </Card> |
| 216 | + """ |
| 217 | + files_to_scrape = doc_information["files"] |
| 218 | + |
| 219 | + full_content, code_blocks = zip( |
| 220 | + *[ |
| 221 | + _extract_mdx_content_and_code(Path(truss_directory) / file, file) |
| 222 | + for file in files_to_scrape |
| 223 | + ] |
| 224 | + ) |
| 225 | + |
| 226 | + full_code_block = "\n".join(code_blocks) |
| 227 | + file_content = "\n".join(full_content) + _generate_request_example_block( |
| 228 | + full_code_block |
| 229 | + ) |
| 230 | + example_content = f"""{header}\n{link_to_github}\n{file_content}""" |
| 231 | + path_to_example = Path(example_destination) |
| 232 | + path_to_example.parent.mkdir(parents=True, exist_ok=True) |
| 233 | + |
| 234 | + path_to_example.write_text(example_content) |
| 235 | + |
| 236 | + |
| 237 | +def _format_group_name(group_name: str) -> str: |
| 238 | + """ |
| 239 | + This function takes the parent directory name in, and converts it |
| 240 | + into a more human readable format for the table of contents. |
| 241 | +
|
| 242 | + Note that parent directory names are assumed to be in the format: |
| 243 | + * 1_introduction/... (becomes "Introduction") |
| 244 | + * 2_image_classification/... (becomes "Image classification") |
| 245 | + * 3_llms/... (becomes "LLMs") |
| 246 | + """ |
| 247 | + lowercase_name = " ".join(group_name.split("_")[1:]) |
| 248 | + # Capitalize the first letter. We do this rather than |
| 249 | + # use .capitalize() or .title() because we want to preserve |
| 250 | + # the case of subsequent letters |
| 251 | + return lowercase_name[0].upper() + lowercase_name[1:] |
| 252 | + |
| 253 | + |
| 254 | +def update_toc(example_dirs: List[str]): |
| 255 | + """ |
| 256 | + Update the table of contents in the README.md file. |
| 257 | +
|
| 258 | + Parameters: |
| 259 | + example_dirs: List of directories as strings in the form "truss-examples-2/..." |
| 260 | + """ |
| 261 | + |
| 262 | + # Exclude the root directory ("truss_examples") from the path |
| 263 | + transformed_example_paths = [Path(example).parts[1:] for example in example_dirs] |
| 264 | + |
| 265 | + mint_config = json.loads(fetch_file_contents(MINT_CONFIG_PATH)) |
| 266 | + navigation = mint_config["navigation"] |
| 267 | + |
| 268 | + examples_section = [item for item in navigation if item["group"] == "Examples"][0] |
| 269 | + |
| 270 | + # Sort examples by the group name |
| 271 | + examples_section["pages"] = [ |
| 272 | + f"examples/{example_path[0]}/{example_path[1]}" |
| 273 | + for example_path in sorted( |
| 274 | + transformed_example_paths, key=lambda example: example[0] |
| 275 | + ) |
| 276 | + ] |
| 277 | + |
| 278 | + serialized_mint_config = json.dumps(mint_config, indent=2) |
| 279 | + Path(MINT_CONFIG_PATH).write_text(serialized_mint_config) |
| 280 | + |
| 281 | + |
| 282 | +def generate_truss_examples(): |
| 283 | + """ |
| 284 | + Walk through the Truss examples repo, and for each |
| 285 | + of the examples in the repo, generate documentation. |
| 286 | +
|
| 287 | + Finish the process by updating the table of contents. |
| 288 | + """ |
| 289 | + clone_repo() |
| 290 | + |
| 291 | + example_dirs = _fetch_example_dirs(DESTINATION_DIR) |
| 292 | + for truss_directory in example_dirs: |
| 293 | + _generate_truss_example(truss_directory) |
| 294 | + |
| 295 | + update_toc(example_dirs) |
| 296 | + |
| 297 | + |
| 298 | +if __name__ == "__main__": |
| 299 | + generate_truss_examples() |
0 commit comments