yiliu30
diff --git a/‎aten/src/ATen/native/native_functions.yaml
+3-3 b/‎aten/src/ATen/native/native_functions.yaml
+3-3
diff --git a/‎aten/src/ATen/native/nested/NestedTensorMath.cpp
+1-1 b/‎aten/src/ATen/native/nested/NestedTensorMath.cpp
+1-1
diff --git a/‎docs/source/nested.rst
+9-7 b/‎docs/source/nested.rst
+9-7
diff --git a/‎test/cpp/api/nested.cpp
+1-1 b/‎test/cpp/api/nested.cpp
+1-1
diff --git a/‎test/forward_backward_compatibility/check_forward_backward_compatibility.py
+2 b/‎test/forward_backward_compatibility/check_forward_backward_compatibility.py
+2
diff --git a/‎test/profiler/test_profiler.py
+1-1 b/‎test/profiler/test_profiler.py
+1-1
diff --git a/‎test/test_autograd.py
+8-8 b/‎test/test_autograd.py
+8-8
diff --git a/‎test/test_native_mha.py
+5-5 b/‎test/test_native_mha.py
+5-5
@@ -12716,11 +12716,11 @@
   variants: function
   python_module: nn
 
-- func: nested_tensor(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+- func: _nested_tensor_from_tensor_list(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   variants: function
   dispatch:
-    CompositeExplicitAutograd: nested_tensor
-  autogen: nested_tensor.out
+    CompositeExplicitAutograd: _nested_tensor_from_tensor_list
+  autogen: _nested_tensor_from_tensor_list.out
 
 - func: _fw_primal_copy(Tensor self, int level) -> Tensor
   variants: function
 
@@ -161,7 +161,7 @@ bool NestedTensor_nested_tensor_from_mask_left_aligned(const Tensor& t, const Te
     return sizes.equal(nums);
 }
 
-Tensor nested_tensor(
+Tensor _nested_tensor_from_tensor_list(
     TensorList list,
     c10::optional<ScalarType> dtype,
     c10::optional<Layout> layout,
 
@@ -23,7 +23,7 @@ Construction is straightforward and involves passing a list of Tensors to the co
 tensor([0, 1, 2])
 >>> b
 tensor([3, 4, 5, 6, 7])
->>> nt = torch.nested_tensor([a, b])
+>>> nt = torch.nested.nested_tensor([a, b])
 >>> nt
 nested_tensor([
   tensor([0, 1, 2]),
@@ -32,7 +32,7 @@ nested_tensor([
 
 Data type and device can be chosen via the usual keyword arguments.
 
->>> nt = torch.nested_tensor([a, b], dtype=torch.float32, device="cuda")
+>>> nt = torch.nested.nested_tensor([a, b], dtype=torch.float32, device="cuda")
 >>> nt
 nested_tensor([
   tensor([0., 1., 2.], device='cuda:0'),
@@ -43,15 +43,15 @@ In order to form a valid NestedTensor the passed Tensors also all need to match
 
 >>> a = torch.randn(3, 50, 70) # image 1
 >>> b = torch.randn(3, 128, 64) # image 2
->>> nt = torch.nested_tensor([a, b], dtype=torch.float32)
+>>> nt = torch.nested.nested_tensor([a, b], dtype=torch.float32)
 >>> nt.dim()
 4
 
 If one of the dimensions don't match, the constructor throws an error.
 
 >>> a = torch.randn(50, 128) # text 1
 >>> b = torch.randn(3, 128, 64) # image 2
->>> nt = torch.nested_tensor([a, b], dtype=torch.float32)
+>>> nt = torch.nested.nested_tensor([a, b], dtype=torch.float32)
 Traceback (most recent call last):
   File "<stdin>", line 1, in <module>
 RuntimeError: All Tensors given to nested_tensor must have the same dimension. Found dimension 3 for Tensor at index 1 and dimension 2 for Tensor at index 0.
@@ -73,7 +73,7 @@ Even though a NestedTensor does not support .size() (or .shape), it supports .si
 
 >>> a = torch.randn(50, 128) # text 1
 >>> b = torch.randn(32, 128) # text 2
->>> nt = torch.nested_tensor([a, b], dtype=torch.float32)
+>>> nt = torch.nested.nested_tensor([a, b], dtype=torch.float32)
 >>> nt.size(0)
 2
 >>> nt.size(1)
@@ -86,7 +86,7 @@ RuntimeError: Given dimension 1 is irregular and does not have a size.
 If all dimensions are regular, the NestedTensor is intended to be semantically indistinguishable from a regular torch.Tensor.
 
 >>> a = torch.randn(20, 128) # text 1
->>> nt = torch.nested_tensor([a, a], dtype=torch.float32)
+>>> nt = torch.nested.nested_tensor([a, a], dtype=torch.float32)
 >>> nt.size(0)
 2
 >>> nt.size(1)
@@ -112,7 +112,7 @@ unbind allows you to retrieve a view of the constituents.
 >>> import torch
 >>> a = torch.randn(2, 3)
 >>> b = torch.randn(3, 4)
->>> nt = torch.nested_tensor([a, b], dtype=torch.float32)
+>>> nt = torch.nested.nested_tensor([a, b], dtype=torch.float32)
 >>> nt
 nested_tensor([
   tensor([[ 1.2286, -1.2343, -1.4842],
@@ -149,4 +149,6 @@ The following functions are related to nested tensors:
 
 .. currentmodule:: torch.nested
 
+.. autofunction:: nested_tensor
+.. autofunction:: as_nested_tensor
 .. autofunction:: to_padded_tensor
@@ -10,6 +10,6 @@
 TEST(NestedTest, Nested) {
   auto a = torch::randn({2, 3});
   auto b = torch::randn({4, 5});
-  auto nt = torch::nested_tensor({a, b});
+  auto nt = torch::nested::nested_tensor({a, b});
   torch::nested::to_padded_tensor(nt, 0);
 }
@@ -284,6 +284,8 @@
     ("c10d::allgather_", datetime.date(2022, 10, 1)),
     ("aten::to_padded_tensor", datetime.date(2022, 10, 1)),
     ("aten::nested_to_padded_tensor", datetime.date(2022, 10, 1)),
+    ("aten::nested_tensor", datetime.date(2022, 10, 15)),
+
 ]
 
 ALLOW_LIST_COMPILED = [
 
@@ -1248,7 +1248,7 @@ def test_nested_tensor_with_shapes(self):
         a = torch.randn(4, 4)
         b = torch.randn(4, 4)
         c = torch.randn(4, 4)
-        inp = torch.nested_tensor([a, b])
+        inp = torch.nested.nested_tensor([a, b])
         with torch.profiler.profile(record_shapes=True) as prof:
             torch.nn.functional.linear(inp, c, None)
         for e in prof.events():
 
@@ -3569,12 +3569,12 @@ def test_calculate_shape_util(self):
         assert out_shape == torch.Size([10, 5])
         assert grad_shape == torch.Size([5, 10])
 
-        out = torch.nested_tensor([
+        out = torch.nested.as_nested_tensor([
             torch.randn(10, 5, requires_grad=True),
             torch.randn(10, 5, requires_grad=True),
             torch.randn(10, 5, requires_grad=True)]
         )
-        grad = torch.nested_tensor([torch.randn(5, 10, requires_grad=True), torch.randn(5, 10, requires_grad=True)])
+        grad = torch.nested.as_nested_tensor([torch.randn(5, 10, requires_grad=True), torch.randn(5, 10, requires_grad=True)])
         out_shape, grad_shape = _calculate_shape(out, grad, False)
 
         assert torch.equal(out_shape, torch.tensor([[10, 5], [10, 5], [10, 5]]))
@@ -9178,12 +9178,12 @@ def test_autograd_multiple_dispatch_registrations(self, device):
         # test registered AutogradNestedTensor formula
         a = torch.arange(6, dtype=torch.float, device=device).reshape(2, 3).requires_grad_(True)
         b = torch.arange(8, dtype=torch.float, device=device).reshape(2, 4).requires_grad_(True)
-        nt = torch.nested_tensor([a, b], dtype=torch.float, device=device)
+        nt = torch.nested.as_nested_tensor([a, b], dtype=torch.float, device=device)
 
         nt_out = torch._test_autograd_multiple_dispatch(nt)
         c = torch.randn(2, 3, device=device)
         d = torch.randn(2, 4, device=device)
-        nt_grad = torch.nested_tensor([c, d], dtype=torch.float, device=device)
+        nt_grad = torch.nested.nested_tensor([c, d], dtype=torch.float, device=device)
         nt_out.backward(nt_grad)
 
         # bogus gradient for AutogradNestedTensor is grad * grad
@@ -9204,12 +9204,12 @@ def test_autograd_composite_implicit_and_dispatch_registration(self, device):
         # test registered AutogradNestedTensor formula
         a = torch.arange(6, dtype=torch.float, device=device).reshape(2, 3).requires_grad_(True)
         b = torch.arange(8, dtype=torch.float, device=device).reshape(2, 4).requires_grad_(True)
-        nt = torch.nested_tensor([a, b], dtype=torch.float, device=device)
+        nt = torch.nested.as_nested_tensor([a, b], dtype=torch.float, device=device)
 
         nt_out = torch._test_autograd_multiple_dispatch(nt, True)
         c = torch.randn(2, 3, device=device)
         d = torch.randn(2, 4, device=device)
-        nt_grad = torch.nested_tensor([c, d], dtype=torch.float, device=device)
+        nt_grad = torch.nested.nested_tensor([c, d], dtype=torch.float, device=device)
         nt_out.backward(nt_grad)
 
         # bogus gradient for AutogradNestedTensor is grad * grad + grad
@@ -9274,9 +9274,9 @@ def foo(x):
         foo(inp).backward()
 
         # sum's input is saved for Nested Tensors
-        nt = torch.nested_tensor([torch.rand(2), torch.rand(2)], device=device).requires_grad_()
+        nt = torch.nested.nested_tensor([torch.rand(2), torch.rand(2)], device=device, requires_grad=True)
         with self.assertRaisesRegex(RuntimeError, "modified by an inplace operation"):
-            foo(nt).backward(torch.nested_tensor([torch.rand(1), torch.rand(1)], device=device))
+            foo(nt).backward(torch.nested.nested_tensor([torch.rand(1), torch.rand(1)], device=device))
 
 # Import test cases from below autograd/ here. These are found
 # implicitly by the loader, so Flake8 thinks they are unused, hence
 
@@ -39,7 +39,7 @@ def _test_transform_bias_rescale_qkv_impl(
                     xs = list(torch.unbind(x))
                     if use_padding:
                         xs[0] = xs[0][:-1]
-                    x = torch.nested_tensor(xs, device=device, dtype=dtype)
+                    x = torch.nested.nested_tensor(xs, device=device, dtype=dtype)
                 qkv = torch.nn.Linear(embed_dim, 3 * embed_dim, device=device, dtype=dtype)
 
                 # We have to use inference_mode here because q/k/v are
@@ -199,15 +199,15 @@ def forward(self, q, k, v, key_padding_mask):
                     qs = [x[:-1] for x in qs]
                 else:
                     qs[0] = qs[0][:-1]
-            q = torch.nested_tensor(qs, device=device, dtype=dtype)
+            q = torch.nested.nested_tensor(qs, device=device, dtype=dtype)
             if mode == "self":
                 k = v = q
             elif mode == "encdec":
-                k = torch.nested_tensor(torch.unbind(k), device=device, dtype=dtype)
+                k = torch.nested.nested_tensor(torch.unbind(k), device=device, dtype=dtype)
                 v = k
             else:
-                k = torch.nested_tensor(torch.unbind(k), device=device, dtype=dtype)
-                v = torch.nested_tensor(torch.unbind(v), device=device, dtype=dtype)
+                k = torch.nested.nested_tensor(torch.unbind(k), device=device, dtype=dtype)
+                v = torch.nested.nested_tensor(torch.unbind(v), device=device, dtype=dtype)
 
         ynpt, weight_npt = npt(
             q, k, v, key_padding_mask=mask if use_padding and not use_nt else None
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ bool NestedTensor_nested_tensor_from_mask_left_aligned(const Tensor& t, const Te`
`161`	`161`	`return sizes.equal(nums);`
`162`	`162`	`}`
`163`	`163`
`164`		`-Tensor nested_tensor(`
	`164`	`+Tensor _nested_tensor_from_tensor_list(`
`165`	`165`	`TensorList list,`
`166`	`166`	`c10::optional<ScalarType> dtype,`
`167`	`167`	`c10::optional<Layout> layout,`
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,6 @@`
`10`	`10`	`TEST(NestedTest, Nested) {`
`11`	`11`	`auto a = torch::randn({2, 3});`
`12`	`12`	`auto b = torch::randn({4, 5});`
`13`		`- auto nt = torch::nested_tensor({a, b});`
	`13`	`+ auto nt = torch::nested::nested_tensor({a, b});`
`14`	`14`	`torch::nested::to_padded_tensor(nt, 0);`
`15`	`15`	`}`
Original file line number	Diff line number	Diff line change
`@@ -284,6 +284,8 @@`
`284`	`284`	`("c10d::allgather_", datetime.date(2022, 10, 1)),`
`285`	`285`	`("aten::to_padded_tensor", datetime.date(2022, 10, 1)),`
`286`	`286`	`("aten::nested_to_padded_tensor", datetime.date(2022, 10, 1)),`
	`287`	`+ ("aten::nested_tensor", datetime.date(2022, 10, 15)),`
	`288`	`+`
`287`	`289`	`]`
`288`	`290`
`289`	`291`	`ALLOW_LIST_COMPILED = [`