Skip to content

Commit

Permalink
fix and format by ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
YoshitakaNaraoka committed Dec 27, 2023
1 parent 7233a75 commit 88f839e
Show file tree
Hide file tree
Showing 31 changed files with 106 additions and 189 deletions.
4 changes: 3 additions & 1 deletion build.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import platform
from distutils.command.build_ext import build_ext
from distutils.errors import CCompilerError, DistutilsExecError, DistutilsPlatformError

import setuptools
from setuptools_rust import Binding, RustExtension

Expand All @@ -9,7 +12,6 @@
from numpy import get_include
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy"])
from numpy import __version__ as numpy_version
from numpy import get_include

try:
Expand Down
16 changes: 11 additions & 5 deletions docs/parse/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
the __doc__ of each object and formats it so that MkDocs can process it in turn.
"""
from __future__ import annotations

import argparse
import collections
import doctest
Expand Down Expand Up @@ -360,14 +362,16 @@ def print_docstring(obj, file):
# Methods
if inspect.isclass(obj) and doc["Methods"]:
printf(h2("Methods"))
printf_indent = lambda x, **kwargs: printf(f" {x}", **kwargs)

def printf_indent(x, **kwargs):
return printf(f" {x}", **kwargs)

for meth in doc["Methods"]:
base_method_names = {"clone", "mutate"}

if (
issubclass(obj, river.base.Base)
and not obj is river.base.Base
and obj is not river.base.Base
and meth.name in base_method_names
):
continue
Expand Down Expand Up @@ -395,8 +399,8 @@ def print_docstring(obj, file):
}

if (
issubclass(obj, (collections.UserList, collections.UserDict))
and not obj is river.base.Ensemble
issubclass(obj, collections.UserList | collections.UserDict)
and obj is not river.base.Ensemble
and meth.name in container_method_names
):
continue
Expand Down Expand Up @@ -488,7 +492,9 @@ def print_module(mod, path, overview, depth=0, verbose=False):
print(md_line(mod.__doc__), file=overview)

# Extract all public classes and functions
ispublic = lambda x: x.__name__ in mod.__all__ and not x.__name__.startswith("_")
def ispublic(x):
return x.__name__ in mod.__all__ and not x.__name__.startswith("_")

classes = inspect.getmembers(mod, lambda x: inspect.isclass(x) and ispublic(x))
funcs = inspect.getmembers(mod, lambda x: inspect.isfunction(x) and ispublic(x))

Expand Down
4 changes: 1 addition & 3 deletions river/active/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ class EntropySampler(ActiveLearningClassifier):
"""

def __init__(
self, classifier: base.Classifier, discount_factor: float = 3, seed=None
):
def __init__(self, classifier: base.Classifier, discount_factor: float = 3, seed=None):
super().__init__(classifier, seed=seed)
self.discount_factor = discount_factor

Expand Down
7 changes: 2 additions & 5 deletions river/anomaly/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,7 @@ class ThresholdFilter(anomaly.base.AnomalyFilter):
"""

def __init__(
self, anomaly_detector, threshold: float, protect_anomaly_detector=True
):
def __init__(self, anomaly_detector, threshold: float, protect_anomaly_detector=True):
super().__init__(
anomaly_detector=anomaly_detector,
protect_anomaly_detector=protect_anomaly_detector,
Expand Down Expand Up @@ -188,7 +186,6 @@ def _unit_test_params(cls):
from river import preprocessing

yield {
"anomaly_detector": preprocessing.StandardScaler()
| anomaly.OneClassSVM(nu=0.2),
"anomaly_detector": preprocessing.StandardScaler() | anomaly.OneClassSVM(nu=0.2),
"q": 0.995,
}
4 changes: 1 addition & 3 deletions river/bandit/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
__all__ = ["ArmID", "Policy", "ContextualPolicy", "RewardObj"]

ArmID = typing.Union[int, str] # noqa: UP007
RewardObj = typing.Union[ # noqa: UP007
stats.base.Statistic, metrics.base.Metric, proba.base.Distribution
]
RewardObj = typing.Union[stats.base.Statistic, metrics.base.Metric, proba.base.Distribution] # noqa: UP007


class Policy(base.Base, abc.ABC):
Expand Down
8 changes: 2 additions & 6 deletions river/bandit/envs/candy_cane.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,8 @@ def __init__(self, n_machines=100, reward_decay=0.03):
self.action_space = gym.spaces.Discrete(n_machines)
self.observation_space = gym.spaces.Dict(
{
"attempts": gym.spaces.Tuple(
[gym.spaces.Discrete(self.n_steps)] * n_machines
),
"successes": gym.spaces.Tuple(
[gym.spaces.Discrete(self.n_steps)] * n_machines
),
"attempts": gym.spaces.Tuple([gym.spaces.Discrete(self.n_steps)] * n_machines),
"successes": gym.spaces.Tuple([gym.spaces.Discrete(self.n_steps)] * n_machines),
}
)
self.reward_range = (0.0, 1.0)
Expand Down
7 changes: 2 additions & 5 deletions river/bandit/exp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ def __init__(
burn_in=0,
seed: int | None = None,
):
super().__init__(
reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in
)
super().__init__(reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in)
self.seed = seed
self.gamma = gamma
self._rng = random.Random(seed)
Expand All @@ -91,8 +89,7 @@ def __init__(
def _pull(self, arm_ids):
total = sum(self._weights[arm_id] for arm_id in arm_ids)
self._probabilities = {
arm_id: (1 - self.gamma) * (self._weights[arm_id] / total)
+ self.gamma / len(arm_ids)
arm_id: (1 - self.gamma) * (self._weights[arm_id] / total) + self.gamma / len(arm_ids)
for arm_id in arm_ids
}
return self._rng.choices(arm_ids, weights=self._probabilities.values())[0]
Expand Down
4 changes: 1 addition & 3 deletions river/bandit/ucb.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ def __init__(
burn_in=0,
seed: int = None,
):
super().__init__(
reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in
)
super().__init__(reward_obj=reward_obj, reward_scaler=reward_scaler, burn_in=burn_in)
self.delta = delta
self.seed = seed
self._rng = random.Random(seed)
Expand Down
4 changes: 1 addition & 3 deletions river/cluster/clustream.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,5 @@ def inverse_error(x):

def __iadd__(self, other: CluStreamMicroCluster):
self.var_time += other.var_time
self.var_x = {
k: self.var_x[k] + other.var_x.get(k, stats.Var()) for k in self.var_x
}
self.var_x = {k: self.var_x[k] + other.var_x.get(k, stats.Var()) for k in self.var_x}
return self
32 changes: 16 additions & 16 deletions river/cluster/test_dbstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ def test_density_graph_with_three_micro_clusters():


def test_density_graph_with_removed_microcluster():
dbstream = build_dbstream(fading_factor=0.1,
intersection_factor=0.3)
dbstream = build_dbstream(fading_factor=0.1, intersection_factor=0.3)

add_cluster(dbstream, initial_point={1: 1, 2: 1}, move_towards={1: 1.7, 2: 1.7}, times=25)
add_cluster(dbstream, initial_point={1: 3, 2: 3}, move_towards={1: 2.3, 2: 2.3}, times=25)
Expand All @@ -162,21 +161,17 @@ def test_density_graph_with_removed_microcluster():

dbstream._recluster()
assert len(dbstream.clusters) == 1
assert_micro_cluster_properties(
dbstream.clusters[0], center={1: 2.560647, 2: 2.560647}
)
assert_micro_cluster_properties(dbstream.clusters[0], center={1: 2.560647, 2: 2.560647})


def test_dbstream_synthetic_sklearn():
centers = [(-10, -10), (-5, -5), (0, 0), (5, 5), (10, 10)]
cluster_std = [0.6] * 5

# Create a dataset with 15000 data points with 5 centers and cluster SD of 0.6 each
X, y = make_blobs(n_samples=15_000,
cluster_std=cluster_std,
centers=centers,
n_features=2,
random_state=42)
X, y = make_blobs(
n_samples=15_000, cluster_std=cluster_std, centers=centers, n_features=2, random_state=42
)

dbstream = DBSTREAM(
clustering_threshold=2,
Expand All @@ -202,11 +197,16 @@ def test_dbstream_synthetic_sklearn():
dbstream._recluster()

# Check that the resulted cluster centers are close to the expected centers
dbstream_expected_centers = {0: {0: 10, 1: 10},
1: {0: -5, 1: -5},
2: {0: 0, 1: 0},
3: {0: 5, 1: 5},
4: {0: -10, 1: -10}}
dbstream_expected_centers = {
0: {0: 10, 1: 10},
1: {0: -5, 1: -5},
2: {0: 0, 1: 0},
3: {0: 5, 1: 5},
4: {0: -10, 1: -10},
}

for i in dbstream.centers.keys():
assert utils.math.minkowski_distance(dbstream.centers[i], dbstream_expected_centers[i], 2) < 0.2
assert (
utils.math.minkowski_distance(dbstream.centers[i], dbstream_expected_centers[i], 2)
< 0.2
)
43 changes: 10 additions & 33 deletions river/cluster/textclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,7 @@ def _get_closest_mc(self, mc, idf, distance):
if counter > 1:
## our threshold
mu = (sumdist - min_dist) / (counter - 1)
threshold = mu - self.sigma * math.sqrt(
squaresum / (counter - 1) - mu**2
)
threshold = mu - self.sigma * math.sqrt(squaresum / (counter - 1) - mu**2)

if min_dist < threshold:
clusterId = smallest_key
Expand All @@ -288,9 +286,7 @@ def _calculateIDF(self, micro_clusters):
# update weights according to the fading factor
def _updateweights(self):
for micro in self.micro_clusters.values():
micro.fade(
self.t, self.omega, self.fading_factor, self.term_fading, self.realtime
)
micro.fade(self.t, self.omega, self.fading_factor, self.term_fading, self.realtime)

# delete micro clusters with a weight smaller omega
for key in list(self.micro_clusters.keys()):
Expand Down Expand Up @@ -373,9 +369,7 @@ def _get_distance_matrix(self, clusters):
ids = list(clusters.keys())

# initialize all distances to 0
distances = pd.DataFrame(
np.zeros((numClusters, numClusters)), columns=ids, index=ids
)
distances = pd.DataFrame(np.zeros((numClusters, numClusters)), columns=ids, index=ids)

for idx, row in enumerate(ids):
for col in ids[idx + 1 :]:
Expand Down Expand Up @@ -455,10 +449,7 @@ def get_macroclusters(self):
numClusters = min([self.num_macro, len(self.micro_clusters)])

# create empty clusters
macros = {
x: self.microcluster({}, self.t, 0, self.realtime, x)
for x in range(numClusters)
}
macros = {x: self.microcluster({}, self.t, 0, self.realtime, x) for x in range(numClusters)}

# merge micro clusters to macro clusters
for key, value in self.microToMacro.items():
Expand All @@ -478,9 +469,7 @@ def get_macroclusters(self):
def showclusters(self, topn, num, type="micro"):
# first clusters are sorted according to their respective weights
if type == "micro":
sortedmicro = sorted(
self.micro_clusters.values(), key=lambda x: x.weight, reverse=True
)
sortedmicro = sorted(self.micro_clusters.values(), key=lambda x: x.weight, reverse=True)
else:
sortedmicro = sorted(
self.get_macroclusters().values(), key=lambda x: x.weight, reverse=True
Expand Down Expand Up @@ -510,10 +499,7 @@ def showclusters(self, topn, num, type="micro"):
]
for rep in representatives:
print(
"weight: "
+ str(round(rep[1], 2))
+ "\t token: "
+ str(rep[0]).expandtabs(10)
"weight: " + str(round(rep[1], 2)) + "\t token: " + str(rep[0]).expandtabs(10)
)
print("-------------------------------------------")

Expand All @@ -539,9 +525,7 @@ def get_assignment(self, x, type):
# identify the closest micro cluster using the predefined distance measure
for key in self.micro_clusters.keys():
if self.micro_clusters[key].weight > self.min_weight:
cur_dist = self.micro_distance.dist(
mc, self.micro_clusters[key], idf
)
cur_dist = self.micro_distance.dist(mc, self.micro_clusters[key], idf)
if cur_dist < dist:
dist = cur_dist
closest = key
Expand Down Expand Up @@ -616,9 +600,7 @@ def __init__(self, type):

## generic method that is called for each distance
def dist(self, m1, m2, idf):
return getattr(self, self.type, lambda: "Invalid distance measure")(
m1, m2, idf
)
return getattr(self, self.type, lambda: "Invalid distance measure")(m1, m2, idf)

##calculate cosine similarity directly and fast
def tfidf_cosine_distance(self, mc, microcluster, idf):
Expand All @@ -628,17 +610,12 @@ def tfidf_cosine_distance(self, mc, microcluster, idf):
for k in list(mc.tf.keys()):
if k in idf:
if k in microcluster.tf:
sum += (mc.tf[k]["tf"] * idf[k]) * (
microcluster.tf[k]["tf"] * idf[k]
)
sum += (mc.tf[k]["tf"] * idf[k]) * (microcluster.tf[k]["tf"] * idf[k])
tfidflen += mc.tf[k]["tf"] * idf[k] * mc.tf[k]["tf"] * idf[k]
tfidflen = math.sqrt(tfidflen)
for k in list(microcluster.tf.keys()):
microtfidflen += (
microcluster.tf[k]["tf"]
* idf[k]
* microcluster.tf[k]["tf"]
* idf[k]
microcluster.tf[k]["tf"] * idf[k] * microcluster.tf[k]["tf"] * idf[k]
)
microtfidflen = math.sqrt(microtfidflen)
if tfidflen == 0 or microtfidflen == 0:
Expand Down
10 changes: 2 additions & 8 deletions river/conf/jackknife.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,7 @@ def __init__(

alpha = (1 - confidence_level) / 2
self._lower = (
stats.RollingQuantile(alpha, window_size)
if window_size
else stats.Quantile(alpha)
stats.RollingQuantile(alpha, window_size) if window_size else stats.Quantile(alpha)
)
self._upper = (
stats.RollingQuantile(1 - alpha, window_size)
Expand All @@ -109,11 +107,7 @@ def _wrapped_model(self):
def _unit_test_params(cls):
from river import linear_model, preprocessing

yield {
"regressor": (
preprocessing.StandardScaler() | linear_model.LinearRegression()
)
}
yield {"regressor": (preprocessing.StandardScaler() | linear_model.LinearRegression())}

def learn_one(self, x, y, **kwargs):
# Update the quantiles
Expand Down
Loading

0 comments on commit 88f839e

Please sign in to comment.