Default ramalama serve to only listen on localhost

Currently RamaLama is listening on 0.0.0.0 which means that it can listen on all ports, including ports that are exposed outside of the host. Moving to 127.0.0.1 means that the service is only available on the local system. This will only effect llama.cpp running without containers. Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
containers · Feb 27, 2025 · 783ffc8 · 783ffc8
1 parent dc25be9
commit 783ffc8
Show file tree

Hide file tree

Showing 7 changed files with 37 additions and 14 deletions.
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
@@ -68,7 +68,8 @@ Generate specified configuration format for running the AI Model as a service
 show this help message and exit
 
 #### **--host**="0.0.0.0"
-IP address for llama.cpp to listen on.
+IP address for llama.cpp to listen on. Only supported when llama.cpp is running
+without a container.
 
 #### **--name**, **-n**
 Name of the container to run the Model in.

diff --git a/docs/ramalama.conf b/docs/ramalama.conf
@@ -40,9 +40,10 @@
 #
 #image = "quay.io/ramalama/ramalama:latest"
 
-# IP address for llama.cpp to listen on.
+# IP address for llama.cpp to listen on. Only supported when llama.cpp is
+# running without a container.
 #
-#host = "0.0.0.0"
+#host = "127.0.0.1"
 
 # Pass `--group-add keep-groups` to podman, when using podman.
 # In some cases this is needed to access the gpu from a rootless container

diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
@@ -78,9 +78,10 @@ Run RamaLama using the specified container engine.
 Valid options are: Podman and Docker
 This field can be overridden by the RAMALAMA_CONTAINER_ENGINE environment variable.
 
-**host**="0.0.0.0"
+**host**="127.0.0.1"
 
-IP address for llama.cpp to listen on.
+IP address for llama.cpp to listen on. Only supported when llama.cpp is running
+without a container.
 
 **image**="quay.io/ramalama/ramalama:latest"
 

diff --git a/ramalama/config.py b/ramalama/config.py
@@ -70,7 +70,7 @@ def load_config_defaults(config: Dict[str, Any]):
     config['ctx_size'] = config.get('ctx_size', 2048)
     config['pull'] = config.get('pull', "newer")
     config['temp'] = config.get('temp', "0.8")
-    config['host'] = config.get('host', "0.0.0.0")
+    config['host'] = config.get('host', "127.0.0.1")
     config['port'] = config.get('port', "8080")
 
 

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -527,7 +527,8 @@ def handle_runtime(self, args, exec_args, exec_model_path):
             gpu_args = self.gpu_args(args=args)
             if gpu_args is not None:
                 exec_args.extend(gpu_args)
-            exec_args.extend(["--host", args.host])
+            if not args.container:
+                exec_args.extend(["--host", args.host])
         return exec_args
 
     def generate_container_config(self, model_path, args, exec_args):

diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
@@ -17,12 +17,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
 	run_ramalama --dryrun serve --name foobar ${model}
 	is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
 	assert "$output" !~ ".*--network" "--network is not part of the output"
-	assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
 	is "$output" ".*${model}" "verify model name"
+	assert "$output" !~ ".*--host" "verify --host not added when run within container"
 	assert "$output" !~ ".*--seed" "assert seed does not show by default"
 
 	run_ramalama --dryrun serve --network bridge --host 127.1.2.3 --name foobar ${model}
-	assert "$output" =~ "--network bridge.*--host 127.1.2.3" "verify --host is modified when run within container"
 	is "$output" ".*${model}" "verify model name"
 	is "$output" ".*--temp 0.8" "verify temp is set"
 
@@ -53,9 +52,9 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
 	run_ramalama stop --all
     else
 	run_ramalama --dryrun serve ${model}
-	assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
-	run_ramalama --dryrun serve --seed abcd --host 127.0.0.1 ${model}
-	assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
+	assert "$output" =~ ".*--host 127.0.0.1" "Outside container sets host to 127.0.0.1"
+	run_ramalama --dryrun serve --seed abcd --host 127.0.0.2 ${model}
+	assert "$output" =~ ".*--host 127.0.0.2" "Outside container overrides host to 127.0.0.2"
 	assert "$output" =~ ".*--seed abcd" "Verify seed is set"
 	run_ramalama 1 --nocontainer serve --name foobar tiny
 	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. The --name option requires a container." "conflict between nocontainer and --name line"

diff --git a/test/unit/test_config.py b/test/unit/test_config.py
@@ -63,7 +63,7 @@ def test_load_config_from_env(env, config, expected):
                 "ctx_size": 2048,
                 "pull": "newer",
                 "temp": "0.8",
-                "host": "0.0.0.0",
+                "host": "127.0.0.1",
                 "port": "8080",
             },
         ),
@@ -80,10 +80,30 @@ def test_load_config_from_env(env, config, expected):
                 "ctx_size": 2048,
                 "pull": "newer",
                 "temp": "0.8",
-                "host": "0.0.0.0",
+                "host": "127.0.0.1",
                 "port": "8080",
             },
         ),
+        (
+            {
+                "host": "1.2.3.4",
+                "port": "8081",
+                "temp": "10.0",
+                "pull": "never",
+            },
+            {
+                "nocontainer": False,
+                "carimage": "registry.access.redhat.com/ubi9-micro:latest",
+                "runtime": "llama.cpp",
+                "ngl": -1,
+                "keep_groups": False,
+                "ctx_size": 2048,
+                "pull": "never",
+                "temp": "10.0",
+                "host": "1.2.3.4",
+                "port": "8081",
+            },
+        ),
     ],
 )
 def test_load_config_defaults(config, expected):