diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md index 099b9bee..82009d2f 100644 --- a/docs/ramalama-serve.1.md +++ b/docs/ramalama-serve.1.md @@ -68,7 +68,8 @@ Generate specified configuration format for running the AI Model as a service show this help message and exit #### **--host**="0.0.0.0" -IP address for llama.cpp to listen on. +IP address for llama.cpp to listen on. Only supported when llama.cpp is running +without a container. #### **--name**, **-n** Name of the container to run the Model in. diff --git a/docs/ramalama.conf b/docs/ramalama.conf index 072bcedb..2ba93c8e 100644 --- a/docs/ramalama.conf +++ b/docs/ramalama.conf @@ -40,9 +40,10 @@ # #image = "quay.io/ramalama/ramalama:latest" -# IP address for llama.cpp to listen on. +# IP address for llama.cpp to listen on. Only supported when llama.cpp is +# running without a container. # -#host = "0.0.0.0" +#host = "127.0.0.1" # Pass `--group-add keep-groups` to podman, when using podman. # In some cases this is needed to access the gpu from a rootless container diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md index 96407d3b..37e5d950 100644 --- a/docs/ramalama.conf.5.md +++ b/docs/ramalama.conf.5.md @@ -78,9 +78,10 @@ Run RamaLama using the specified container engine. Valid options are: Podman and Docker This field can be overridden by the RAMALAMA_CONTAINER_ENGINE environment variable. -**host**="0.0.0.0" +**host**="127.0.0.1" -IP address for llama.cpp to listen on. +IP address for llama.cpp to listen on. Only supported when llama.cpp is running +without a container. **image**="quay.io/ramalama/ramalama:latest" diff --git a/ramalama/config.py b/ramalama/config.py index 09ad22ce..c5a68797 100644 --- a/ramalama/config.py +++ b/ramalama/config.py @@ -70,7 +70,7 @@ def load_config_defaults(config: Dict[str, Any]): config['ctx_size'] = config.get('ctx_size', 2048) config['pull'] = config.get('pull', "newer") config['temp'] = config.get('temp', "0.8") - config['host'] = config.get('host', "0.0.0.0") + config['host'] = config.get('host', "127.0.0.1") config['port'] = config.get('port', "8080") diff --git a/ramalama/model.py b/ramalama/model.py index 2a2b981e..2975126b 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -527,7 +527,8 @@ def handle_runtime(self, args, exec_args, exec_model_path): gpu_args = self.gpu_args(args=args) if gpu_args is not None: exec_args.extend(gpu_args) - exec_args.extend(["--host", args.host]) + if not args.container: + exec_args.extend(["--host", args.host]) return exec_args def generate_container_config(self, model_path, args, exec_args): diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index aadaf5bc..17931ca8 100755 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -17,12 +17,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name" run_ramalama --dryrun serve --name foobar ${model} is "$output" "${verify_begin} foobar .*" "dryrun correct with --name" assert "$output" !~ ".*--network" "--network is not part of the output" - assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container" is "$output" ".*${model}" "verify model name" + assert "$output" !~ ".*--host" "verify --host not added when run within container" assert "$output" !~ ".*--seed" "assert seed does not show by default" run_ramalama --dryrun serve --network bridge --host 127.1.2.3 --name foobar ${model} - assert "$output" =~ "--network bridge.*--host 127.1.2.3" "verify --host is modified when run within container" is "$output" ".*${model}" "verify model name" is "$output" ".*--temp 0.8" "verify temp is set" @@ -53,9 +52,9 @@ verify_begin=".*run --rm -i --label ai.ramalama --name" run_ramalama stop --all else run_ramalama --dryrun serve ${model} - assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0" - run_ramalama --dryrun serve --seed abcd --host 127.0.0.1 ${model} - assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1" + assert "$output" =~ ".*--host 127.0.0.1" "Outside container sets host to 127.0.0.1" + run_ramalama --dryrun serve --seed abcd --host 127.0.0.2 ${model} + assert "$output" =~ ".*--host 127.0.0.2" "Outside container overrides host to 127.0.0.2" assert "$output" =~ ".*--seed abcd" "Verify seed is set" run_ramalama 1 --nocontainer serve --name foobar tiny is "${lines[0]}" "Error: --nocontainer and --name options conflict. The --name option requires a container." "conflict between nocontainer and --name line" diff --git a/test/unit/test_config.py b/test/unit/test_config.py index c8b70222..83be1954 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -63,7 +63,7 @@ def test_load_config_from_env(env, config, expected): "ctx_size": 2048, "pull": "newer", "temp": "0.8", - "host": "0.0.0.0", + "host": "127.0.0.1", "port": "8080", }, ), @@ -80,10 +80,30 @@ def test_load_config_from_env(env, config, expected): "ctx_size": 2048, "pull": "newer", "temp": "0.8", - "host": "0.0.0.0", + "host": "127.0.0.1", "port": "8080", }, ), + ( + { + "host": "1.2.3.4", + "port": "8081", + "temp": "10.0", + "pull": "never", + }, + { + "nocontainer": False, + "carimage": "registry.access.redhat.com/ubi9-micro:latest", + "runtime": "llama.cpp", + "ngl": -1, + "keep_groups": False, + "ctx_size": 2048, + "pull": "never", + "temp": "10.0", + "host": "1.2.3.4", + "port": "8081", + }, + ), ], ) def test_load_config_defaults(config, expected):