Skip to content

Commit

Permalink
Merge commit from fork
Browse files Browse the repository at this point in the history
This change:
 * Generalize file access checks.
 * When `--allow-local-file-access` flag is set, only allow access to files
 within src dir and below.
 * Always allow access to `templates_dir` for XML entity includes.

Fixes GHSA-432c-wxpg-m4q3
  • Loading branch information
kesara authored Feb 6, 2025
1 parent a46fad0 commit 386cf09
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 25 deletions.
42 changes: 42 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from xml2rfc.writers.base import default_options, BaseV3Writer, RfcWriterError
from xml2rfc.writers import DatatrackerToBibConverter
from xml2rfc.writers.text import MAX_WIDTH
from xml2rfc.util.file import can_access, FileAccessError

try:
from xml2rfc import debug
Expand Down Expand Up @@ -882,5 +883,46 @@ def test_convert(self):
self.assertEqual(xincludes[1].get("href"), f"https://bib.ietf.org/public/rfc/bibxml-ids/{reference_b}")


class FileAccessTest(unittest.TestCase):
def setUp(self):
self.options = copy.deepcopy(default_options)

def test_allow_access_false(self):
self.options.allow_local_file_access = False
with self.assertRaises(FileAccessError) as e:
can_access(self.options, "/foo/bar", "/bar")
self.assertIn("Can not access local file" in e.exception)

def test_has_meta_chars(self):
self.options.allow_local_file_access = True
for char in "[><*[`$|;&(#]":
with self.assertRaises(FileAccessError) as e:
can_access(self.options, "/foo/bar", f"/bar{char}")
self.assertIn("Found disallowed shell meta-characters" in e.exception)

def test_not_within_src_dir(self):
self.options.allow_local_file_access = True
with self.assertRaises(FileAccessError) as e:
can_access(self.options, "/foo/bar", "/etc/passwd")
self.assertIn("Expected a file located beside or below the .xml source" in e.exception)

def test_non_existent_file(self):
self.options.allow_local_file_access = True
with self.assertRaises(FileAccessError) as e:
can_access(self.options, "test.py", "foobar.foobar")
self.assertIn("no such file exists" in e.exception)

def test_allowed_access(self):
self.options.allow_local_file_access = True
self.assertTrue(can_access(self.options, "test.py", "test.py"))

def test_allowed_access_template(self):
self.options.allow_local_file_access = False
self.assertTrue(can_access(self.options,
source="test.py",
path="rfc2629-xhtml.ent",
access_templates=True))


if __name__ == '__main__':
unittest.main()
16 changes: 8 additions & 8 deletions xml2rfc/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import xml2rfc.utils

from xml2rfc.writers import base
from xml2rfc.util.file import can_access, FileAccessError

try:
from urllib.parse import urlparse, urljoin, urlsplit
Expand Down Expand Up @@ -141,14 +142,13 @@ def resolve(self, request, public_id, context):
if not url.netloc or url.scheme == 'file':
if request.startswith("file://"):
request = request[7:]
if not self.options.allow_local_file_access:
path = self.getReferenceRequest(request)
abspath = os.path.dirname(os.path.abspath(path))
if not (abspath == self.options.template_dir or
abspath == self.source_dir):
error = 'Can not access local file: {}'.format(request)
xml2rfc.log.error(error)
raise XmlRfcError(error)
try:
can_access(self.options,
self.source,
self.getReferenceRequest(request),
access_templates=True)
except FileAccessError as error:
raise XmlRfcError(str(error))
try:
path = self.getReferenceRequest(request)
return self.resolve_filename(path, context)
Expand Down
12 changes: 9 additions & 3 deletions xml2rfc/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Copyright The IETF Trust 2017, All Rights Reserved
# Copyright The IETF Trust 2017, All Rights Reserved
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function

from xml2rfc.util import name, num, date, postal
from xml2rfc.util import name, num, date, postal, file

__all__ = ['name', 'num', 'date', 'postal', ]
__all__ = [
"name",
"num",
"date",
"postal",
"file",
]
46 changes: 46 additions & 0 deletions xml2rfc/util/file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright The IETF Trust 2025, All Rights Reserved
# -*- coding: utf-8 -*-
import os
import re


class FileAccessError(Exception):
"""File access error"""

pass


def can_access(options, source, path, access_templates=False):
# templates can be always accessed
template_path = os.path.abspath(os.path.join(options.template_dir, path))
if (
access_templates
and template_path.startswith(options.template_dir)
and os.path.exists(template_path)
):
return True

# user allowed access?
if not options.allow_local_file_access:
raise FileAccessError(
f"Can not access local file: {path}. Use --allow-local-file-access enable access."
)

# does it have shell meta-chars?
shellmeta = re.compile("[><*[`$|;&(#]")
if shellmeta.search(path):
raise FileAccessError(f"Found disallowed shell meta-characters in {path}")

# file is within the source dir?
dir = os.path.abspath(os.path.dirname(source))
path = os.path.abspath(os.path.join(dir, path))
if not path.startswith(dir):
raise FileAccessError(
f"Expected a file located beside or below the .xml source (in {dir}), but found a reference to {path}"
)

# does it exists?
if not os.path.exists(path):
raise FileAccessError(f"Expected a file at '{path}', but no such file exists")

return True
10 changes: 6 additions & 4 deletions xml2rfc/writers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from xml2rfc import strings, log
from xml2rfc.util.date import extract_date, augment_date, format_date, get_expiry_date
from xml2rfc.util.file import can_access, FileAccessError
from xml2rfc.util.name import short_author_ascii_name_parts, full_author_name_expansion, short_author_name_parts
from xml2rfc.util.unicode import is_svg
from xml2rfc.utils import namespaces, find_duplicate_ids, slugify
Expand Down Expand Up @@ -1856,8 +1857,7 @@ def xinclude(self):
# xml:base attribute). The tool may be configurable with a limit on
# the depth of recursion.
try:
if not self.options.allow_local_file_access:
self.check_includes()
self.check_includes()
self.tree.xinclude()
except etree.XIncludeError as e:
self.die(None, "XInclude processing failed: %s" % e)
Expand All @@ -1869,8 +1869,10 @@ def check_includes(self):
for xinclude in xincludes:
href = urlparse(xinclude.get('href'))
if not href.netloc or href.scheme == 'file':
error = 'XInclude processing failed: Can not access local file: {}'.format(xinclude.get('href'))
self.die(None, error)
try:
can_access(self.options, self.xmlrfc.source, href.path)
except FileAccessError as error:
self.die(None, error)

def remove_dtd(self):
#
Expand Down
16 changes: 6 additions & 10 deletions xml2rfc/writers/preptool.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from xml2rfc.scripts import get_scripts
from xml2rfc.uniscripts import is_script
from xml2rfc.util.date import get_expiry_date, format_date, normalize_month
from xml2rfc.util.file import can_access, FileAccessError
from xml2rfc.util.name import full_author_name_expansion
from xml2rfc.util.num import ol_style_formatter
from xml2rfc.util.unicode import (
Expand Down Expand Up @@ -1714,20 +1715,15 @@ def check_src_scheme(self, e, src):
return (scheme, netloc, path, query, fragment)

def check_src_file_path(self, e, scheme, netloc, path, query, fragment):
shellmeta = re.compile("[><*[`$|;&(#]")
try:
can_access(self.options, self.xmlrfc.source, path)
except FileAccessError as err:
self.err(e, err)
return None
#
dir = os.path.abspath(os.path.dirname(self.xmlrfc.source))
path = os.path.abspath(os.path.join(dir, path))
if not path.startswith(dir):
self.err(e, "Expected an <%s> src= file located beside or below the .xml source (in %s), but found a reference to %s" % (e.tag, dir, path))
return None
src = urlunsplit((scheme, '', path, '', ''))
if shellmeta.search(src):
self.err(e, "Found disallowed shell meta-characters in the src='file:...' attribute")
return None
if not os.path.exists(path):
self.err(e, "Expected an <%s> src= file at '%s', but no such file exists" % (e.tag, path, ))
return None
#
e.set('src', src)
return src
Expand Down

0 comments on commit 386cf09

Please sign in to comment.