From 783ffc821771efca0858d2ec34e97e63ff94e92b Mon Sep 17 00:00:00 2001
From: Daniel J Walsh <dwalsh@redhat.com>
Date: Thu, 27 Feb 2025 04:50:43 -1000
Subject: [PATCH] Default ramalama serve to only listen on localhost

Currently RamaLama is listening on 0.0.0.0 which means that it
can listen on all ports, including ports that are exposed outside of the
host. Moving to 127.0.0.1 means that the service is only available on
the local system.

This will only effect llama.cpp running without containers.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
---
 docs/ramalama-serve.1.md   |  3 ++-
 docs/ramalama.conf         |  5 +++--
 docs/ramalama.conf.5.md    |  5 +++--
 ramalama/config.py         |  2 +-
 ramalama/model.py          |  3 ++-
 test/system/040-serve.bats |  9 ++++-----
 test/unit/test_config.py   | 24 ++++++++++++++++++++++--
 7 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
index 099b9bee..82009d2f 100644
--- a/docs/ramalama-serve.1.md
+++ b/docs/ramalama-serve.1.md
@@ -68,7 +68,8 @@ Generate specified configuration format for running the AI Model as a service
 show this help message and exit
 
 #### **--host**="0.0.0.0"
-IP address for llama.cpp to listen on.
+IP address for llama.cpp to listen on. Only supported when llama.cpp is running
+without a container.
 
 #### **--name**, **-n**
 Name of the container to run the Model in.
diff --git a/docs/ramalama.conf b/docs/ramalama.conf
index 072bcedb..2ba93c8e 100644
--- a/docs/ramalama.conf
+++ b/docs/ramalama.conf
@@ -40,9 +40,10 @@
 #
 #image = "quay.io/ramalama/ramalama:latest"
 
-# IP address for llama.cpp to listen on.
+# IP address for llama.cpp to listen on. Only supported when llama.cpp is
+# running without a container.
 #
-#host = "0.0.0.0"
+#host = "127.0.0.1"
 
 # Pass `--group-add keep-groups` to podman, when using podman.
 # In some cases this is needed to access the gpu from a rootless container
diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
index 96407d3b..37e5d950 100644
--- a/docs/ramalama.conf.5.md
+++ b/docs/ramalama.conf.5.md
@@ -78,9 +78,10 @@ Run RamaLama using the specified container engine.
 Valid options are: Podman and Docker
 This field can be overridden by the RAMALAMA_CONTAINER_ENGINE environment variable.
 
-**host**="0.0.0.0"
+**host**="127.0.0.1"
 
-IP address for llama.cpp to listen on.
+IP address for llama.cpp to listen on. Only supported when llama.cpp is running
+without a container.
 
 **image**="quay.io/ramalama/ramalama:latest"
 
diff --git a/ramalama/config.py b/ramalama/config.py
index 09ad22ce..c5a68797 100644
--- a/ramalama/config.py
+++ b/ramalama/config.py
@@ -70,7 +70,7 @@ def load_config_defaults(config: Dict[str, Any]):
     config['ctx_size'] = config.get('ctx_size', 2048)
     config['pull'] = config.get('pull', "newer")
     config['temp'] = config.get('temp', "0.8")
-    config['host'] = config.get('host', "0.0.0.0")
+    config['host'] = config.get('host', "127.0.0.1")
     config['port'] = config.get('port', "8080")
 
 
diff --git a/ramalama/model.py b/ramalama/model.py
index 2a2b981e..2975126b 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -527,7 +527,8 @@ def handle_runtime(self, args, exec_args, exec_model_path):
             gpu_args = self.gpu_args(args=args)
             if gpu_args is not None:
                 exec_args.extend(gpu_args)
-            exec_args.extend(["--host", args.host])
+            if not args.container:
+                exec_args.extend(["--host", args.host])
         return exec_args
 
     def generate_container_config(self, model_path, args, exec_args):
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
index aadaf5bc..17931ca8 100755
--- a/test/system/040-serve.bats
+++ b/test/system/040-serve.bats
@@ -17,12 +17,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
 	run_ramalama --dryrun serve --name foobar ${model}
 	is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
 	assert "$output" !~ ".*--network" "--network is not part of the output"
-	assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
 	is "$output" ".*${model}" "verify model name"
+	assert "$output" !~ ".*--host" "verify --host not added when run within container"
 	assert "$output" !~ ".*--seed" "assert seed does not show by default"
 
 	run_ramalama --dryrun serve --network bridge --host 127.1.2.3 --name foobar ${model}
-	assert "$output" =~ "--network bridge.*--host 127.1.2.3" "verify --host is modified when run within container"
 	is "$output" ".*${model}" "verify model name"
 	is "$output" ".*--temp 0.8" "verify temp is set"
 
@@ -53,9 +52,9 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
 	run_ramalama stop --all
     else
 	run_ramalama --dryrun serve ${model}
-	assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
-	run_ramalama --dryrun serve --seed abcd --host 127.0.0.1 ${model}
-	assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
+	assert "$output" =~ ".*--host 127.0.0.1" "Outside container sets host to 127.0.0.1"
+	run_ramalama --dryrun serve --seed abcd --host 127.0.0.2 ${model}
+	assert "$output" =~ ".*--host 127.0.0.2" "Outside container overrides host to 127.0.0.2"
 	assert "$output" =~ ".*--seed abcd" "Verify seed is set"
 	run_ramalama 1 --nocontainer serve --name foobar tiny
 	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. The --name option requires a container." "conflict between nocontainer and --name line"
diff --git a/test/unit/test_config.py b/test/unit/test_config.py
index c8b70222..83be1954 100644
--- a/test/unit/test_config.py
+++ b/test/unit/test_config.py
@@ -63,7 +63,7 @@ def test_load_config_from_env(env, config, expected):
                 "ctx_size": 2048,
                 "pull": "newer",
                 "temp": "0.8",
-                "host": "0.0.0.0",
+                "host": "127.0.0.1",
                 "port": "8080",
             },
         ),
@@ -80,10 +80,30 @@ def test_load_config_from_env(env, config, expected):
                 "ctx_size": 2048,
                 "pull": "newer",
                 "temp": "0.8",
-                "host": "0.0.0.0",
+                "host": "127.0.0.1",
                 "port": "8080",
             },
         ),
+        (
+            {
+                "host": "1.2.3.4",
+                "port": "8081",
+                "temp": "10.0",
+                "pull": "never",
+            },
+            {
+                "nocontainer": False,
+                "carimage": "registry.access.redhat.com/ubi9-micro:latest",
+                "runtime": "llama.cpp",
+                "ngl": -1,
+                "keep_groups": False,
+                "ctx_size": 2048,
+                "pull": "never",
+                "temp": "10.0",
+                "host": "1.2.3.4",
+                "port": "8081",
+            },
+        ),
     ],
 )
 def test_load_config_defaults(config, expected):