Skip to content

Commit

Permalink
Default ramalama serve to only listen on localhost
Browse files Browse the repository at this point in the history
Currently RamaLama is listening on 0.0.0.0 which means that it
can listen on all ports, including ports that are exposed outside of the
host. Moving to 127.0.0.1 means that the service is only available on
the local system.

This will only effect llama.cpp running without containers.

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
  • Loading branch information
rhatdan committed Feb 27, 2025
1 parent dc25be9 commit 783ffc8
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 14 deletions.
3 changes: 2 additions & 1 deletion docs/ramalama-serve.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ Generate specified configuration format for running the AI Model as a service
show this help message and exit

#### **--host**="0.0.0.0"
IP address for llama.cpp to listen on.
IP address for llama.cpp to listen on. Only supported when llama.cpp is running
without a container.

#### **--name**, **-n**
Name of the container to run the Model in.
Expand Down
5 changes: 3 additions & 2 deletions docs/ramalama.conf
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@
#
#image = "quay.io/ramalama/ramalama:latest"

# IP address for llama.cpp to listen on.
# IP address for llama.cpp to listen on. Only supported when llama.cpp is
# running without a container.
#
#host = "0.0.0.0"
#host = "127.0.0.1"

# Pass `--group-add keep-groups` to podman, when using podman.
# In some cases this is needed to access the gpu from a rootless container
Expand Down
5 changes: 3 additions & 2 deletions docs/ramalama.conf.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ Run RamaLama using the specified container engine.
Valid options are: Podman and Docker
This field can be overridden by the RAMALAMA_CONTAINER_ENGINE environment variable.

**host**="0.0.0.0"
**host**="127.0.0.1"

IP address for llama.cpp to listen on.
IP address for llama.cpp to listen on. Only supported when llama.cpp is running
without a container.

**image**="quay.io/ramalama/ramalama:latest"

Expand Down
2 changes: 1 addition & 1 deletion ramalama/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def load_config_defaults(config: Dict[str, Any]):
config['ctx_size'] = config.get('ctx_size', 2048)
config['pull'] = config.get('pull', "newer")
config['temp'] = config.get('temp', "0.8")
config['host'] = config.get('host', "0.0.0.0")
config['host'] = config.get('host', "127.0.0.1")
config['port'] = config.get('port', "8080")


Expand Down
3 changes: 2 additions & 1 deletion ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,8 @@ def handle_runtime(self, args, exec_args, exec_model_path):
gpu_args = self.gpu_args(args=args)
if gpu_args is not None:
exec_args.extend(gpu_args)
exec_args.extend(["--host", args.host])
if not args.container:
exec_args.extend(["--host", args.host])
return exec_args

def generate_container_config(self, model_path, args, exec_args):
Expand Down
9 changes: 4 additions & 5 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
run_ramalama --dryrun serve --name foobar ${model}
is "$output" "${verify_begin} foobar .*" "dryrun correct with --name"
assert "$output" !~ ".*--network" "--network is not part of the output"
assert "$output" =~ ".*--host 0.0.0.0" "verify host 0.0.0.0 is added when run within container"
is "$output" ".*${model}" "verify model name"
assert "$output" !~ ".*--host" "verify --host not added when run within container"
assert "$output" !~ ".*--seed" "assert seed does not show by default"

run_ramalama --dryrun serve --network bridge --host 127.1.2.3 --name foobar ${model}
assert "$output" =~ "--network bridge.*--host 127.1.2.3" "verify --host is modified when run within container"
is "$output" ".*${model}" "verify model name"
is "$output" ".*--temp 0.8" "verify temp is set"

Expand Down Expand Up @@ -53,9 +52,9 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
run_ramalama stop --all
else
run_ramalama --dryrun serve ${model}
assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
run_ramalama --dryrun serve --seed abcd --host 127.0.0.1 ${model}
assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
assert "$output" =~ ".*--host 127.0.0.1" "Outside container sets host to 127.0.0.1"
run_ramalama --dryrun serve --seed abcd --host 127.0.0.2 ${model}
assert "$output" =~ ".*--host 127.0.0.2" "Outside container overrides host to 127.0.0.2"
assert "$output" =~ ".*--seed abcd" "Verify seed is set"
run_ramalama 1 --nocontainer serve --name foobar tiny
is "${lines[0]}" "Error: --nocontainer and --name options conflict. The --name option requires a container." "conflict between nocontainer and --name line"
Expand Down
24 changes: 22 additions & 2 deletions test/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_load_config_from_env(env, config, expected):
"ctx_size": 2048,
"pull": "newer",
"temp": "0.8",
"host": "0.0.0.0",
"host": "127.0.0.1",
"port": "8080",
},
),
Expand All @@ -80,10 +80,30 @@ def test_load_config_from_env(env, config, expected):
"ctx_size": 2048,
"pull": "newer",
"temp": "0.8",
"host": "0.0.0.0",
"host": "127.0.0.1",
"port": "8080",
},
),
(
{
"host": "1.2.3.4",
"port": "8081",
"temp": "10.0",
"pull": "never",
},
{
"nocontainer": False,
"carimage": "registry.access.redhat.com/ubi9-micro:latest",
"runtime": "llama.cpp",
"ngl": -1,
"keep_groups": False,
"ctx_size": 2048,
"pull": "never",
"temp": "10.0",
"host": "1.2.3.4",
"port": "8081",
},
),
],
)
def test_load_config_defaults(config, expected):
Expand Down

0 comments on commit 783ffc8

Please sign in to comment.