@@ -1022,7 +1022,7 @@ Tensor multi_dot_impl(TensorList _tensors, c10::optional<Tensor> _out) {
1022
1022
1023
1023
// If the last and last tensors have shapes (a, b) and (b, c) the
1024
1024
// output has shape (a, c). If either the first or last tensor is 1D
1025
- // a and/or c dimensions will be implicitely size 1 and will be ommited
1025
+ // a and/or c dimensions will be implicitly size 1 and will be omitted
1026
1026
// from the output. e.g. for inputs (a, b) x (b) the output has shape (a,).
1027
1027
at::native::resize_output (out, out_shape);
1028
1028
@@ -1809,7 +1809,7 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
1809
1809
* vs. other threads, leading to undefined behavior.
1810
1810
* Thus it is recommended to not use at::parallel_for where lambdas do
1811
1811
* ops that go through dispatcher.
1812
- * For now we circument this by InferenceMode guard in order to unlock
1812
+ * For now we circumvent this by InferenceMode guard in order to unlock
1813
1813
* performance.
1814
1814
* Longer term we probably want a separate API that explicitly calls out
1815
1815
* the TLS that it propagates.
@@ -1946,7 +1946,7 @@ static bool should_fold(const Tensor& tensor1, const Tensor& tensor2, bool has_o
1946
1946
// The output gradient g of this operation would have shape [b, m, k]
1947
1947
// The backward wrt. t2 of bmm would be given by t1.mH @ g, which has shape [b, n, k]
1948
1948
// Then, the backward of expand is simply `sum(0)`. As such, we are instantiating a tensor
1949
- // of shape [b, n, k] unnacessarily , which may cause a large memory footprint, and in the
1949
+ // of shape [b, n, k] unnecessarily , which may cause a large memory footprint, and in the
1950
1950
// worst case, an OOM
1951
1951
bool t2_requires_grad = tensor1_larger ? tensor2.requires_grad () : tensor1.requires_grad ();
1952
1952
if (t2_requires_grad && !has_out) {
@@ -2602,7 +2602,7 @@ Tensor compute_T18_scale_square(
2602
2602
auto scs = section_values. template data_ptr <int64_t >();
2603
2603
auto pts = &scs[section_numel];
2604
2604
2605
- // We now will do the matrix muplication in a batch, with above example:
2605
+ // We now will do the matrix multiplication in a batch, with above example:
2606
2606
// 1. Multiply all matrices by 0 (`mul_times[0]`) times, then do `slice`
2607
2607
// to get the remain matrices by acc[1:] (`split_counts[0]`),
2608
2608
// 2. Multiply remain matrices by 1 times and slice to acc[2:]
@@ -2761,7 +2761,7 @@ Tensor backward_analytic_function_of_a_matrix(
2761
2761
} // end anon namespace
2762
2762
2763
2763
// Computes the matrix exponential for a given batch of squared matrices.
2764
- // The implementaion is based on:
2764
+ // The implementation is based on:
2765
2765
//
2766
2766
// Bader, P.; Blanes, S.; Casas, F.
2767
2767
// Computing the Matrix Exponential with an Optimized Taylor Polynomial Approximation.
@@ -2812,7 +2812,7 @@ TORCH_IMPL_FUNC(linalg_vector_norm_out)(const Tensor& self, const Scalar& scalar
2812
2812
// Reductions always use `std::abs` to compute the absolute value. In the backward of this
2813
2813
// function, we need to locate the index that was selected as the largest value. To do so
2814
2814
// we do self.abs() == result to locate the index of the largest element.
2815
- // Now, self.abs() may dispatch to a vectorized implementation which gives sliiightly different
2815
+ // Now, self.abs() may dispatch to a vectorized implementation which gives slightly different
2816
2816
// results to the std::abs(std::complex<T>) implementation.
2817
2817
// As such, to be able to compute the correct index in the backward, we need to use self.abs()
2818
2818
// both in the forward and in the backward
0 commit comments