Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Do Not Merge] Create memory root to avoid server error #88

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions jupyter_fsspec/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ def initialize_filesystems(self):
sync_fs = fsspec.filesystem(fs_protocol, *args, **kwargs)
fs = AsyncFileSystemWrapper(sync_fs)

# Temporary fix to handle memory filesystems with empty root path
if fs_protocol == "memory":
if not fs.exists(fs_path):
fs.mkdir(fs_path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might happen on any filesystem - trying to list a directory that doesn't exist will be FileNotFound (if the filesystem has directories at all). So should generalise this to any protocol?

Copy link
Collaborator Author

@RRosio RRosio Mar 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, I don't think that this behavior aligns with typical filesystem expectations. Initially, I was considering cases where a user specifies a memory filesystem path in the config, where I might be inclined to expect the memory filesystem root to exist... but once users can add files and such to the memory filesystem, the root would eventually be created, so it's not really a problem.

I'm wondering if it would be best to propagate the FileNotFound to the frontend then, or would it be more appropriate to interpret the query to the root of an empty memory filesystem as an empty list? Since fsspec would normally raise an error, I'm leaning towards doing that for consistency. In terms of the user experience, I wonder if that could be misleading? I guess for that the backend can raise this error and let the frontend handle this case and how it's presented to the user? I would appreciate your thoughts on this!

Copy link
Collaborator Author

@RRosio RRosio Mar 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since I'm leaning toward the first option, I think I may close this.... I'm working on the exception handling separately and I've pulled in the clean up commits from this PR there.


# Store the filesystem instance
new_filesystems[key] = {
"instance": fs,
Expand Down
58 changes: 27 additions & 31 deletions jupyter_fsspec/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,33 +281,31 @@ async def get(self):
is_async = fs_instance.async_impl

try:
if is_async:
isdir = await fs_instance._isdir(item_path)
else:
isdir = fs_instance.isdir(item_path)
isdir = (
await fs_instance._isdir(item_path)
if is_async
else fs_instance.isdir(item_path)
)

if get_request.type == "range":
range_header = self.request.headers.get("Range")
start, end = parse_range(range_header)
if is_async:
result = await fs_instance._cat_ranges(
[item_path], [int(start)], [int(end)]
)
else:
result = fs_instance.cat_ranges(
[item_path], [int(start)], [int(end)]
)

for i in range(len(result)):
if isinstance(result[i], bytes):
result[i] = result[i].decode("utf-8")
response["content"] = result
result = (
await fs_instance._cat_ranges([item_path], [int(start)], [int(end)])
if is_async
else fs_instance.cat_ranges([item_path], [int(start)], [int(end)])
)

response["content"] = [
r.decode("utf-8") if isinstance(r, bytes) else r for r in result
]
self.set_header("Content-Range", f"bytes {start}-{end}")
elif isdir:
if fs_instance.async_impl:
result = await fs_instance._ls(item_path, detail=True)
else:
result = fs_instance.ls(item_path, detail=True)
result = (
await fs_instance._ls(item_path, detail=True)
if is_async
else fs_instance.ls(item_path, detail=True)
)

detail_to_keep = ["name", "type", "size", "ino", "mode"]
filtered_result = [
Expand All @@ -320,16 +318,14 @@ async def get(self):
]
response["content"] = filtered_result
else:
if fs_instance.async_impl:
result = await fs_instance._cat(item_path)
if isinstance(result, bytes):
result = result.decode("utf-8")
response["content"] = result
else:
result = fs_instance.cat(item_path)
if isinstance(result, bytes):
result = result.decode("utf-8")
response["content"] = result
result = (
await fs_instance._cat(item_path)
if is_async
else fs_instance.cat(item_path)
)
response["content"] = (
result.decode("utf-8") if isinstance(result, bytes) else result
)
self.set_status(200)
response["status"] = "success"
response["description"] = f"Retrieved {item_path}."
Expand Down
6 changes: 4 additions & 2 deletions jupyter_fsspec/tests/unit/test_filesystem_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ def test_load_populated_config(setup_config_dir, config_file):

mem_fs_instance = mem_instance_info["instance"]
assert mem_fs_instance.ls("/") == [
{"name": "/my_mem_dir", "size": 0, "type": "directory"}
{"name": "/my_mem_dir", "size": 0, "type": "directory"},
{"name": "/mem_dir", "size": 0, "type": "directory"},
]


Expand All @@ -171,7 +172,8 @@ def test_check_reload_config(setup_config_dir, config_file):

mem_fs_instance = mem_instance_info["instance"]
assert mem_fs_instance.ls("/") == [
{"name": "/my_mem_dir", "size": 0, "type": "directory"}
{"name": "/my_mem_dir", "size": 0, "type": "directory"},
{"name": "/mem_dir", "size": 0, "type": "directory"},
]


Expand Down
Loading