Skip to content

Commit

Permalink
ready for numpy 2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Jun 21, 2024
1 parent 85fc7e7 commit 139291b
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
12 changes: 6 additions & 6 deletions dedupe/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@
BlocksStr = Iterator[BlockStr]
Blocks = Union[BlocksInt, BlocksStr]
ClusterInt = Tuple[
Tuple[int, ...], Union[numpy.typing.NDArray[numpy.float_], Tuple[float, ...]]
Tuple[int, ...], Union[numpy.typing.NDArray[numpy.float64], Tuple[float, ...]]
]
ClusterStr = Tuple[
Tuple[str, ...], Union[numpy.typing.NDArray[numpy.float_], Tuple[float, ...]]
Tuple[str, ...], Union[numpy.typing.NDArray[numpy.float64], Tuple[float, ...]]
]
ClustersInt = Iterable[ClusterInt]
ClustersStr = Iterable[ClusterStr]
Expand Down Expand Up @@ -107,18 +107,18 @@ class TrainingData(TypedDict):

# Takes pairs of records and generates a (n_samples X n_features) array
FeaturizerFunction = Callable[
[Sequence[RecordDictPair]], numpy.typing.NDArray[numpy.float_]
[Sequence[RecordDictPair]], numpy.typing.NDArray[numpy.float64]
]


class Classifier(Protocol):
"""Takes an array of pairwise distances and computes the likelihood they are a pair."""

def fit(self, X: numpy.typing.NDArray[numpy.float_], y: LabelsLike) -> None: ...
def fit(self, X: numpy.typing.NDArray[numpy.float64], y: LabelsLike) -> None: ...

def predict_proba(
self, X: numpy.typing.NDArray[numpy.float_]
) -> numpy.typing.NDArray[numpy.float_]: ...
self, X: numpy.typing.NDArray[numpy.float64]
) -> numpy.typing.NDArray[numpy.float64]: ...


class ClosableJoinable(Protocol):
Expand Down
6 changes: 3 additions & 3 deletions dedupe/convenience.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ def randomPairs(n_records: int, sample_size: int) -> IndicesIterator:
else:
try:
random_pairs = numpy.array(
random.sample(range(n), sample_size), dtype=numpy.uint
random.sample(range(n), sample_size), dtype=numpy.uint64
)
except OverflowError:
return randomPairsWithReplacement(n_records, sample_size)

b: int = 1 - 2 * n_records

i = (-b - 2 * numpy.sqrt(2 * (n - random_pairs) + 0.25)) // 2
i = i.astype(numpy.uint)
i = i.astype(numpy.int64)

j = random_pairs + i * (b + i + 2) // 2 + 1
j = j.astype(numpy.uint)
j = j.astype(numpy.uint64)

return zip(i, j)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
"scikit-learn",
"affinegap>=1.3",
"categorical-distance>=1.9",
"numpy>=1.20,<2.0",
"numpy>=1.20",
"doublemetaphone",
"highered>=0.2.0",
"simplecosine>=1.2",
Expand Down

0 comments on commit 139291b

Please sign in to comment.