Skip to content

Commit

Permalink
run ruff on all files and apply changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tamar Lavee committed Jul 29, 2024
1 parent 910f33d commit cf82ae9
Show file tree
Hide file tree
Showing 23 changed files with 140 additions and 0 deletions.
5 changes: 5 additions & 0 deletions skll/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ def _find_invalid_options(self) -> Set[str]:
-------
invalid_options : Set[str]
The set of invalid options specified by the user.
"""
# compute a list of all the valid options
valid_options = list(self.defaults().keys()) + self._required_options
Expand Down Expand Up @@ -211,6 +212,7 @@ def _find_ill_specified_options(
the default value for the option does not result in running an
experiment with unexpected settings, this is not really a major
problem.
"""
incorrectly_specified_options = []
multiply_specified_options = []
Expand Down Expand Up @@ -252,6 +254,7 @@ def validate(self) -> None:
KeyError
If any options are not defined in the appropriate sections.
"""
invalid_options = self._find_invalid_options()
if invalid_options:
Expand Down Expand Up @@ -522,6 +525,7 @@ def parse_config_file(
ValueError
If various configuration parameters are incorrectly specified,
or cause conflicts.
"""
# ensure that a path is specified
if not config_path:
Expand Down Expand Up @@ -1114,6 +1118,7 @@ def _setup_config_parser(config_path: PathOrStr, validate=True) -> SKLLConfigPar
------
FileNotFoundError
If the configuration file does not exist.
"""
# initialize config parser with the given defaults
config = SKLLConfigParser()
Expand Down
5 changes: 5 additions & 0 deletions skll/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def fix_json(json_string: str) -> str:
-------
str
The normalized JSON string.
"""
json_string = json_string.replace("True", "true")
json_string = json_string.replace("False", "false")
Expand Down Expand Up @@ -63,6 +64,7 @@ def load_cv_folds(folds_file: PathOrStr, ids_to_floats=False) -> FoldMapping:
------
ValueError
If example IDs cannot be converted to floats and `ids_to_floats` is `True`.
"""
with open(folds_file) as f:
reader = csv.reader(f)
Expand Down Expand Up @@ -106,6 +108,7 @@ def locate_file(file_path: PathOrStr, config_dir: PathOrStr) -> str:
------
FileNotFoundError
If the file does not exist.
"""
if not file_path:
return ""
Expand Down Expand Up @@ -140,6 +143,7 @@ def _munge_featureset_name(name_or_list: Union[Iterable, str]) -> str:
-------
res : str
name components joined with '+' if input is a list or the name itself.
"""
if isinstance(name_or_list, str):
return name_or_list
Expand Down Expand Up @@ -179,6 +183,7 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->
ValueError
If "mean_squared_error" is specified as a metric.
"""
# create a logger if one was not passed in
if not logger:
Expand Down
1 change: 1 addition & 0 deletions skll/data/dict_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class DictVectorizer(OldDictVectorizer):
FeatureHasher : performs vectorization using only a hash function.
sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features
encoded as columns of integers.
"""

def __eq__(self, other):
Expand Down
13 changes: 13 additions & 0 deletions skll/data/featureset.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class FeatureSet(object):
-----
If ids, labels, and/or features are not None, the number of rows in
each array must be equal.
"""

def __init__(
Expand Down Expand Up @@ -125,6 +126,7 @@ def __contains__(self, value):
----------
value
The value to check.
"""
return value in self.ids

Expand All @@ -146,6 +148,7 @@ def __eq__(self, other):
-----
We consider feature values to be equal if any differences are in the
sixth decimal place or higher.
"""
return (
self.ids.shape == other.ids.shape
Expand Down Expand Up @@ -218,6 +221,7 @@ def __add__(self, other: "FeatureSet") -> "FeatureSet":
ValueError
If there are conflicting labels.
"""
# Check that the sets of IDs are equal
if set(self.ids) != set(other.ids):
Expand Down Expand Up @@ -335,6 +339,7 @@ def filter(
ValueError
If attempting to use features to filter a ``FeatureSet`` that
uses a ``FeatureHasher`` vectorizer.
"""
# Construct mask that indicates which examples to keep
mask = np.ones(len(self), dtype=bool)
Expand Down Expand Up @@ -430,6 +435,7 @@ def filtered_iter(
ValueError
If any of the "labels", "features", or "vectorizer" attribute
is ``None``.
"""
if self.features is not None and not isinstance(self.vectorizer, DictVectorizer):
raise ValueError(
Expand Down Expand Up @@ -477,6 +483,7 @@ def __sub__(self, other: "FeatureSet") -> "FeatureSet":
-------
:class:`skll.data.featureset.FeatureSet`
A copy of ``self`` with all features in ``other`` removed.
"""
new_set = deepcopy(self)
if other.vectorizer:
Expand All @@ -492,6 +499,7 @@ def has_labels(self):
-------
has_labels : bool
Whether or not this FeatureSet has any finite labels.
"""
# make sure that labels is not None or a list of Nones
if self.labels is not None and not all(label is None for label in self.labels):
Expand All @@ -510,6 +518,7 @@ def __str__(self):
-------
str:
A string representation of ``FeatureSet``.
"""
return str(self.__dict__)

Expand All @@ -521,6 +530,7 @@ def __repr__(self):
-------
str:
A string representation of ``FeatureSet``.
"""
return repr(self.__dict__)

Expand All @@ -542,6 +552,7 @@ def __getitem__(
If `value` is a slice, then return a new ``FeatureSet`` instance
containing a subset of the data. If it's an index, return the
specific example by row number.
"""
# Check if we're slicing
if isinstance(value, slice):
Expand Down Expand Up @@ -597,6 +608,7 @@ def split(
-------
Tuple[:class:`skll.data.featureset.FeatureSet`, :class:`skll.data.featureset.FeatureSet`]
A tuple containing the two featureset instances.
"""
# Note: an alternative way to implement this is to make copies
# of the given FeatureSet instance and then use the `filter()`
Expand Down Expand Up @@ -655,6 +667,7 @@ def from_data_frame(
-------
:class:`skll.data.featureset.FeatureSet`
A ``FeatureSet`` instance generated from from the given data frame.
"""
if labels_column:
feature_columns = [column for column in df.columns if column != labels_column]
Expand Down
20 changes: 20 additions & 0 deletions skll/data/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class Reader(object):
logger : Optional[logging.Logger], default=None
A logger instance to use to log messages instead of creating
a new one by default.
"""

def __init__(
Expand Down Expand Up @@ -177,6 +178,7 @@ def for_path(cls, path_or_list: Union[PathOrStr, FeatureDictList], **kwargs) ->
------
ValueError
If file does not have a valid extension.
"""
if not isinstance(path_or_list, (str, Path)):
return DictListReader(path_or_list)
Expand Down Expand Up @@ -211,6 +213,7 @@ def _sub_read(self, file):
Raises
------
NotImplementedError
"""
raise NotImplementedError

Expand All @@ -229,6 +232,7 @@ def _print_progress(self, progress_num: Union[int, str], end="\r"):
end : str, default='\r'
The string to put at the end of the line. "\r" should be
used for every update except for the final one.
"""
# Print out status
if not self.quiet:
Expand Down Expand Up @@ -270,6 +274,7 @@ def _sub_read_rows(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, Featu
ValueError
If the example IDs are not unique.
"""
# Get labels and IDs
ids_list: List[IdType] = []
Expand Down Expand Up @@ -365,6 +370,7 @@ def _parse_dataframe(
features : :class:`skll.types.FeatureDictList`
List of feature dictionaries.
"""
if df.empty:
raise ValueError("No features found in possibly empty file " f"'{self.path_or_list}'.")
Expand Down Expand Up @@ -459,6 +465,7 @@ def read(self) -> FeatureSet:
ValueError
If the example IDs are not unique.
"""
self.logger.debug(f"Path: {self.path_or_list}")

Expand Down Expand Up @@ -550,6 +557,7 @@ class DictListReader(Reader):
logger : Optional[logging.Logger], default=None
A logger instance to use to log messages instead of creating
a new one by default.
"""

def read(self) -> FeatureSet:
Expand All @@ -560,6 +568,7 @@ def read(self) -> FeatureSet:
-------
:class:`skll.data.FeatureSet`
A ``FeatureSet`` representing the list of dictionaries we read in.
"""
# if we are in this method, `self.path_or_list` must be a
# list of dictionaries
Expand Down Expand Up @@ -698,6 +707,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
ValueError
If IDs cannot be converted to floats, and ``ids_to_floats``
is ``True``.
"""
for example_num, line in enumerate(file):
# Remove extraneous whitespace
Expand Down Expand Up @@ -789,6 +799,7 @@ class LibSVMReader(Reader):
logger : Optional[logging.Logger], default=None
A logger instance to use to log messages instead of creating
a new one by default.
"""

line_regex = re.compile(
Expand Down Expand Up @@ -827,6 +838,7 @@ def _pair_to_tuple(pair: str, feat_map: Dict[str, str]) -> Tuple[str, Union[floa
The name of the feature.
value : Union[float, int, str]
The value of the example.
"""
name, value = pair.split(":")
if feat_map is not None:
Expand Down Expand Up @@ -859,6 +871,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
------
ValueError
If line does not look like valid libsvm format.
"""
feat_map: Optional[Dict[str, str]]
for example_num, line in enumerate(file):
Expand Down Expand Up @@ -952,6 +965,7 @@ class CSVReader(Reader):
kwargs : Optional[Dict[str, Any]]
Other arguments to the Reader object.
"""

def __init__(
Expand Down Expand Up @@ -990,6 +1004,7 @@ def _sub_read(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, FeatureDic
features : :class:`skll.types.FeatureDictList`
The list of feature dictionaries for the feature set.
"""
df = pd.read_csv(file, sep=self._sep, engine=self._engine, **self._pandas_kwargs)
return self._parse_dataframe(
Expand Down Expand Up @@ -1035,6 +1050,7 @@ class TSVReader(CSVReader):
kwargs : Optional[Dict[str, Any]]
Other arguments to the Reader object.
"""

def __init__(
Expand Down Expand Up @@ -1072,6 +1088,7 @@ class ARFFReader(Reader):
kwargs : Optional[Dict[str, Any]]
Other arguments to the Reader object.
"""

def __init__(self, path_or_list: Union[PathOrStr, List[Dict[str, Any]]], **kwargs):
Expand Down Expand Up @@ -1101,6 +1118,7 @@ def split_with_quotes(
escape_char : str, default='\\'
The escape character.
"""
return next(
csv.reader([string], delimiter=delimiter, quotechar=quote_char, escapechar=escape_char)
Expand All @@ -1125,6 +1143,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
example : :class:`skll.types.FeatureDict`
The example features in dictionary format.
"""
field_names = []
# Process ARFF header
Expand Down Expand Up @@ -1235,6 +1254,7 @@ def safe_float(
Union[float, int, str]
The text value converted to int or float, if possible. Otherwise
it's a string.
"""
# convert to str to be "Safe"!
text = str(text)
Expand Down
Loading

0 comments on commit cf82ae9

Please sign in to comment.