run ruff on all files and apply changes.

EducationalTestingService · Jul 29, 2024 · cf82ae9 · cf82ae9
1 parent 910f33d
commit cf82ae9
Show file tree

Hide file tree

Showing 23 changed files with 140 additions and 0 deletions.
diff --git a/skll/config/__init__.py b/skll/config/__init__.py
@@ -164,6 +164,7 @@ def _find_invalid_options(self) -> Set[str]:
         -------
         invalid_options : Set[str]
             The set of invalid options specified by the user.
+
         """
         # compute a list of all the valid options
         valid_options = list(self.defaults().keys()) + self._required_options
@@ -211,6 +212,7 @@ def _find_ill_specified_options(
         the default value for the option  does not result in running an
         experiment with unexpected settings, this is not really a major
         problem.
+
         """
         incorrectly_specified_options = []
         multiply_specified_options = []
@@ -252,6 +254,7 @@ def validate(self) -> None:
 
         KeyError
             If any options are not defined in the appropriate sections.
+
         """
         invalid_options = self._find_invalid_options()
         if invalid_options:
@@ -522,6 +525,7 @@ def parse_config_file(
     ValueError
         If various configuration parameters are incorrectly specified,
         or cause conflicts.
+
     """
     # ensure that a path is specified
     if not config_path:
@@ -1114,6 +1118,7 @@ def _setup_config_parser(config_path: PathOrStr, validate=True) -> SKLLConfigPar
     ------
     FileNotFoundError
         If the configuration file does not exist.
+
     """
     # initialize config parser with the given defaults
     config = SKLLConfigParser()

diff --git a/skll/config/utils.py b/skll/config/utils.py
@@ -31,6 +31,7 @@ def fix_json(json_string: str) -> str:
     -------
     str
         The normalized JSON string.
+
     """
     json_string = json_string.replace("True", "true")
     json_string = json_string.replace("False", "false")
@@ -63,6 +64,7 @@ def load_cv_folds(folds_file: PathOrStr, ids_to_floats=False) -> FoldMapping:
     ------
     ValueError
         If example IDs cannot be converted to floats and `ids_to_floats` is `True`.
+
     """
     with open(folds_file) as f:
         reader = csv.reader(f)
@@ -106,6 +108,7 @@ def locate_file(file_path: PathOrStr, config_dir: PathOrStr) -> str:
     ------
     FileNotFoundError
         If the file does not exist.
+
     """
     if not file_path:
         return ""
@@ -140,6 +143,7 @@ def _munge_featureset_name(name_or_list: Union[Iterable, str]) -> str:
     -------
     res : str
         name components joined with '+' if input is a list or the name itself.
+
     """
     if isinstance(name_or_list, str):
         return name_or_list
@@ -179,6 +183,7 @@ def _parse_and_validate_metrics(metrics: str, option_name: str, logger=None) ->
 
     ValueError
         If "mean_squared_error" is specified as a metric.
+
     """
     # create a logger if one was not passed in
     if not logger:

diff --git a/skll/data/dict_vectorizer.py b/skll/data/dict_vectorizer.py
@@ -82,6 +82,7 @@ class DictVectorizer(OldDictVectorizer):
     FeatureHasher : performs vectorization using only a hash function.
     sklearn.preprocessing.OneHotEncoder : handles nominal/categorical features
       encoded as columns of integers.
+
     """
 
     def __eq__(self, other):

diff --git a/skll/data/featureset.py b/skll/data/featureset.py
@@ -56,6 +56,7 @@ class FeatureSet(object):
     -----
     If ids, labels, and/or features are not None, the number of rows in
     each array must be equal.
+
     """
 
     def __init__(
@@ -125,6 +126,7 @@ def __contains__(self, value):
         ----------
         value
             The value to check.
+
         """
         return value in self.ids
 
@@ -146,6 +148,7 @@ def __eq__(self, other):
         -----
         We consider feature values to be equal if any differences are in the
         sixth decimal place or higher.
+
         """
         return (
             self.ids.shape == other.ids.shape
@@ -218,6 +221,7 @@ def __add__(self, other: "FeatureSet") -> "FeatureSet":
 
         ValueError
             If there are conflicting labels.
+
         """
         # Check that the sets of IDs are equal
         if set(self.ids) != set(other.ids):
@@ -335,6 +339,7 @@ def filter(
         ValueError
             If attempting to use features to filter a ``FeatureSet`` that
             uses a ``FeatureHasher`` vectorizer.
+
         """
         # Construct mask that indicates which examples to keep
         mask = np.ones(len(self), dtype=bool)
@@ -430,6 +435,7 @@ def filtered_iter(
         ValueError
             If any of the "labels", "features", or "vectorizer" attribute
             is ``None``.
+
         """
         if self.features is not None and not isinstance(self.vectorizer, DictVectorizer):
             raise ValueError(
@@ -477,6 +483,7 @@ def __sub__(self, other: "FeatureSet") -> "FeatureSet":
         -------
         :class:`skll.data.featureset.FeatureSet`
             A copy of ``self`` with all features in ``other`` removed.
+
         """
         new_set = deepcopy(self)
         if other.vectorizer:
@@ -492,6 +499,7 @@ def has_labels(self):
         -------
         has_labels : bool
             Whether or not this FeatureSet has any finite labels.
+
         """
         # make sure that labels is not None or a list of Nones
         if self.labels is not None and not all(label is None for label in self.labels):
@@ -510,6 +518,7 @@ def __str__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return str(self.__dict__)
 
@@ -521,6 +530,7 @@ def __repr__(self):
         -------
         str:
             A string representation of ``FeatureSet``.
+
         """
         return repr(self.__dict__)
 
@@ -542,6 +552,7 @@ def __getitem__(
             If `value` is a slice, then return a new ``FeatureSet`` instance
             containing a subset of the data. If it's an index, return the
             specific example by row number.
+
         """
         # Check if we're slicing
         if isinstance(value, slice):
@@ -597,6 +608,7 @@ def split(
         -------
         Tuple[:class:`skll.data.featureset.FeatureSet`, :class:`skll.data.featureset.FeatureSet`]
             A tuple containing the two featureset instances.
+
         """
         # Note: an alternative way to implement this is to make copies
         # of the given FeatureSet instance and then use the `filter()`
@@ -655,6 +667,7 @@ def from_data_frame(
         -------
         :class:`skll.data.featureset.FeatureSet`
             A ``FeatureSet`` instance generated from from the given data frame.
+
         """
         if labels_column:
             feature_columns = [column for column in df.columns if column != labels_column]

diff --git a/skll/data/readers.py b/skll/data/readers.py
@@ -118,6 +118,7 @@ class Reader(object):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def __init__(
@@ -177,6 +178,7 @@ def for_path(cls, path_or_list: Union[PathOrStr, FeatureDictList], **kwargs) ->
         ------
         ValueError
             If file does not have a valid extension.
+
         """
         if not isinstance(path_or_list, (str, Path)):
             return DictListReader(path_or_list)
@@ -211,6 +213,7 @@ def _sub_read(self, file):
         Raises
         ------
         NotImplementedError
+
         """
         raise NotImplementedError
 
@@ -229,6 +232,7 @@ def _print_progress(self, progress_num: Union[int, str], end="\r"):
         end : str, default='\r'
             The string to put at the end of the line.  "\r" should be
             used for every update except for the final one.
+
         """
         # Print out status
         if not self.quiet:
@@ -270,6 +274,7 @@ def _sub_read_rows(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, Featu
 
         ValueError
             If the example IDs are not unique.
+
         """
         # Get labels and IDs
         ids_list: List[IdType] = []
@@ -365,6 +370,7 @@ def _parse_dataframe(
 
         features : :class:`skll.types.FeatureDictList`
             List of feature dictionaries.
+
         """
         if df.empty:
             raise ValueError("No features found in possibly empty file " f"'{self.path_or_list}'.")
@@ -459,6 +465,7 @@ def read(self) -> FeatureSet:
 
         ValueError
             If the example IDs are not unique.
+
         """
         self.logger.debug(f"Path: {self.path_or_list}")
 
@@ -550,6 +557,7 @@ class DictListReader(Reader):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     def read(self) -> FeatureSet:
@@ -560,6 +568,7 @@ def read(self) -> FeatureSet:
         -------
         :class:`skll.data.FeatureSet`
             A ``FeatureSet`` representing the list of dictionaries we read in.
+
         """
         # if we are in this method, `self.path_or_list` must be a
         # list of dictionaries
@@ -698,6 +707,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
         ValueError
             If IDs cannot be converted to floats, and ``ids_to_floats``
             is ``True``.
+
         """
         for example_num, line in enumerate(file):
             # Remove extraneous whitespace
@@ -789,6 +799,7 @@ class LibSVMReader(Reader):
     logger : Optional[logging.Logger], default=None
         A logger instance to use to log messages instead of creating
         a new one by default.
+
     """
 
     line_regex = re.compile(
@@ -827,6 +838,7 @@ def _pair_to_tuple(pair: str, feat_map: Dict[str, str]) -> Tuple[str, Union[floa
             The name of the feature.
         value : Union[float, int, str]
             The value of the example.
+
         """
         name, value = pair.split(":")
         if feat_map is not None:
@@ -859,6 +871,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
         ------
         ValueError
             If line does not look like valid libsvm format.
+
         """
         feat_map: Optional[Dict[str, str]]
         for example_num, line in enumerate(file):
@@ -952,6 +965,7 @@ class CSVReader(Reader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(
@@ -990,6 +1004,7 @@ def _sub_read(self, file: PathOrStr) -> Tuple[np.ndarray, np.ndarray, FeatureDic
 
         features : :class:`skll.types.FeatureDictList`
             The list of feature dictionaries for the feature set.
+
         """
         df = pd.read_csv(file, sep=self._sep, engine=self._engine, **self._pandas_kwargs)
         return self._parse_dataframe(
@@ -1035,6 +1050,7 @@ class TSVReader(CSVReader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(
@@ -1072,6 +1088,7 @@ class ARFFReader(Reader):
 
     kwargs : Optional[Dict[str, Any]]
         Other arguments to the Reader object.
+
     """
 
     def __init__(self, path_or_list: Union[PathOrStr, List[Dict[str, Any]]], **kwargs):
@@ -1101,6 +1118,7 @@ def split_with_quotes(
 
         escape_char : str, default='\\'
             The escape character.
+
         """
         return next(
             csv.reader([string], delimiter=delimiter, quotechar=quote_char, escapechar=escape_char)
@@ -1125,6 +1143,7 @@ def _sub_read(self, file: IO[str]) -> FeatGenerator:
 
         example : :class:`skll.types.FeatureDict`
             The example features in dictionary format.
+
         """
         field_names = []
         # Process ARFF header
@@ -1235,6 +1254,7 @@ def safe_float(
     Union[float, int, str]
         The text value converted to int or float, if possible. Otherwise
         it's a string.
+
     """
     # convert to str to be "Safe"!
     text = str(text)