diff --git a/docs/changes.rst b/docs/changes.rst
index 90691b76..7e208380 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -19,7 +19,8 @@ v0.4
 - Add `model_format` argument to :meth:`skops.hub_utils.init` to be stored in
   `config.json` so that we know how to load a model from the repository.
   :pr:`242` by `Merve Noyan`_.
-
+- Persistence now supports bytes and bytearrays, added tests to verify that
+  LightGBM, XGBoost, and CatBoost work now. :pr:`244` by `Benjamin Bossan`_.
 
 v0.3
 ----
diff --git a/docs/persistence.rst b/docs/persistence.rst
index 7fced29b..19959f85 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -87,6 +87,26 @@ means if you have custom functions (say, a custom function to be used with
 most ``numpy`` and ``scipy`` functions should work. Therefore, you can actually
 save built-in functions like ``numpy.sqrt``.
 
+Supported libraries
+-------------------
+
+Skops intends to support all of **scikit-learn**, that is, not only its
+estimators, but also other classes like cross validation splitters. Furthermore,
+most types from **numpy** and **scipy** should be supported, such as (sparse)
+arrays, dtypes, random generators, and ufuncs.
+
+Apart from this core, we plan to support machine learning libraries commonly
+used be the community. So far, those are:
+
+- `LightGBM <https://lightgbm.readthedocs.io/>`_ (scikit-learn API)
+- `XGBoost <https://xgboost.readthedocs.io/en/stable/>`_ (scikit-learn API)
+- `CatBoost <https://catboost.ai/en/docs/>`_
+
+If you run into a problem using any of the mentioned libraries, this could mean
+there is a bug in skops. Please open an issue on `our issue tracker
+<https://github.com/skops-dev/skops/issues>`_ (but please check first if a
+corresponding issue already exists).
+
 Roadmap
 -------
 
diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py
index a3f1ced7..a60579c4 100644
--- a/skops/_min_dependencies.py
+++ b/skops/_min_dependencies.py
@@ -27,6 +27,10 @@
     "matplotlib": ("3.3", "docs, tests", None),
     "pandas": ("1", "docs, tests", None),
     "typing_extensions": ("3.7", "install", "python_full_version < '3.8'"),
+    # required for persistence tests of external libraries
+    "lightgbm": ("3", "tests", None),
+    "xgboost": ("1.6", "tests", None),
+    "catboost": ("1.0", "tests", None),
 }
 
 
diff --git a/skops/io/_general.py b/skops/io/_general.py
index 126bcfaf..10ef9a0f 100644
--- a/skops/io/_general.py
+++ b/skops/io/_general.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
+import io
 import json
+import uuid
 from functools import partial
 from types import FunctionType, MethodType
 from typing import Any, Sequence
@@ -475,12 +477,64 @@ def _construct(self):
         return json.loads(self.content)
 
 
+def bytes_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
+    f_name = f"{uuid.uuid4()}.bin"
+    save_context.zip_file.writestr(f_name, obj)
+    res = {
+        "__class__": obj.__class__.__name__,
+        "__module__": get_module(type(obj)),
+        "__loader__": "BytesNode",
+        "file": f_name,
+    }
+    return res
+
+
+def bytearray_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
+    res = bytes_get_state(obj, save_context)
+    res["__loader__"] = "BytearrayNode"
+    return res
+
+
+class BytesNode(Node):
+    def __init__(
+        self,
+        state: dict[str, Any],
+        load_context: LoadContext,
+        trusted: bool | Sequence[str] = False,
+    ) -> None:
+        super().__init__(state, load_context, trusted)
+        self.trusted = self._get_trusted(trusted, [bytes])
+        self.children = {"content": io.BytesIO(load_context.src.read(state["file"]))}
+
+    def _construct(self):
+        content = self.children["content"].getvalue()
+        return content
+
+
+class BytearrayNode(BytesNode):
+    def __init__(
+        self,
+        state: dict[str, Any],
+        load_context: LoadContext,
+        trusted: bool | Sequence[str] = False,
+    ) -> None:
+        super().__init__(state, load_context, trusted)
+        self.trusted = self._get_trusted(trusted, [bytearray])
+
+    def _construct(self):
+        content_bytes = super()._construct()
+        content_bytearray = bytearray(list(content_bytes))
+        return content_bytearray
+
+
 # tuples of type and function that gets the state of that type
 GET_STATE_DISPATCH_FUNCTIONS = [
     (dict, dict_get_state),
     (list, list_get_state),
     (set, set_get_state),
     (tuple, tuple_get_state),
+    (bytes, bytes_get_state),
+    (bytearray, bytearray_get_state),
     (slice, slice_get_state),
     (FunctionType, function_get_state),
     (MethodType, method_get_state),
@@ -494,6 +548,8 @@ def _construct(self):
     "ListNode": ListNode,
     "SetNode": SetNode,
     "TupleNode": TupleNode,
+    "BytesNode": BytesNode,
+    "BytearrayNode": BytearrayNode,
     "SliceNode": SliceNode,
     "FunctionNode": FunctionNode,
     "MethodNode": MethodNode,
diff --git a/skops/io/tests/_utils.py b/skops/io/tests/_utils.py
new file mode 100644
index 00000000..ead14b29
--- /dev/null
+++ b/skops/io/tests/_utils.py
@@ -0,0 +1,170 @@
+import sys
+import warnings
+
+import numpy as np
+from scipy import sparse
+from sklearn.base import BaseEstimator
+from sklearn.utils._testing import assert_allclose_dense_sparse
+
+# TODO: Investigate why that seems to be an issue on MacOS (only observed with
+# Python 3.8)
+ATOL = 1e-6 if sys.platform == "darwin" else 1e-7
+
+
+def _is_steps_like(obj):
+    # helper function to check if an object is something like Pipeline.steps,
+    # i.e. a list of tuples of names and estimators
+    if not isinstance(obj, list):  # must be a list
+        return False
+
+    if not obj:  # must not be empty
+        return False
+
+    if not isinstance(obj[0], tuple):  # must be list of tuples
+        return False
+
+    lens = set(map(len, obj))
+    if not lens == {2}:  # all elements must be length 2 tuples
+        return False
+
+    keys, vals = list(zip(*obj))
+
+    if len(keys) != len(set(keys)):  # keys must be unique
+        return False
+
+    if not all(map(lambda x: isinstance(x, (type(None), BaseEstimator)), vals)):
+        # values must be BaseEstimators or None
+        return False
+
+    return True
+
+
+def _assert_generic_objects_equal(val1, val2):
+    def _is_builtin(val):
+        # Check if value is a builtin type
+        return getattr(getattr(val, "__class__", {}), "__module__", None) == "builtins"
+
+    if isinstance(val1, (list, tuple, np.ndarray)):
+        assert len(val1) == len(val2)
+        for subval1, subval2 in zip(val1, val2):
+            _assert_generic_objects_equal(subval1, subval2)
+            return
+
+    assert type(val1) == type(val2)
+    if hasattr(val1, "__dict__"):
+        assert_params_equal(val1.__dict__, val2.__dict__)
+    elif _is_builtin(val1):
+        assert val1 == val2
+    else:
+        # not a normal Python class, could be e.g. a Cython class
+        assert val1.__reduce__() == val2.__reduce__()
+
+
+def _assert_tuples_equal(val1, val2):
+    assert len(val1) == len(val2)
+    for subval1, subval2 in zip(val1, val2):
+        _assert_vals_equal(subval1, subval2)
+
+
+def _assert_vals_equal(val1, val2):
+    if hasattr(val1, "__getstate__"):
+        # This includes BaseEstimator since they implement __getstate__ and
+        # that returns the parameters as well.
+        #
+        # Some objects return a tuple of parameters, others a dict.
+        state1 = val1.__getstate__()
+        state2 = val2.__getstate__()
+        assert type(state1) == type(state2)
+        if isinstance(state1, tuple):
+            _assert_tuples_equal(state1, state2)
+        else:
+            assert_params_equal(val1.__getstate__(), val2.__getstate__())
+    elif sparse.issparse(val1):
+        assert sparse.issparse(val2) and ((val1 - val2).nnz == 0)
+    elif isinstance(val1, (np.ndarray, np.generic)):
+        if len(val1.dtype) == 0:
+            # for arrays with at least 2 dimensions, check that contiguity is
+            # preserved
+            if val1.squeeze().ndim > 1:
+                assert val1.flags["C_CONTIGUOUS"] is val2.flags["C_CONTIGUOUS"]
+                assert val1.flags["F_CONTIGUOUS"] is val2.flags["F_CONTIGUOUS"]
+            if val1.dtype == object:
+                assert val2.dtype == object
+                assert val1.shape == val2.shape
+                for subval1, subval2 in zip(val1, val2):
+                    _assert_generic_objects_equal(subval1, subval2)
+            else:
+                # simple comparison of arrays with simple dtypes, almost all
+                # arrays are of this sort.
+                np.testing.assert_array_equal(val1, val2)
+        elif len(val1.shape) == 1:
+            # comparing arrays with structured dtypes, but they have to be 1D
+            # arrays. This is what we get from the Tree's state.
+            assert np.all([x == y for x, y in zip(val1, val2)])
+        else:
+            # we don't know what to do with these values, for now.
+            assert False
+    elif isinstance(val1, (tuple, list)):
+        assert len(val1) == len(val2)
+        for subval1, subval2 in zip(val1, val2):
+            _assert_vals_equal(subval1, subval2)
+    elif isinstance(val1, float) and np.isnan(val1):
+        assert np.isnan(val2)
+    elif isinstance(val1, dict):
+        # dictionaries are compared by comparing their values recursively.
+        assert set(val1.keys()) == set(val2.keys())
+        for key in val1:
+            _assert_vals_equal(val1[key], val2[key])
+    elif hasattr(val1, "__dict__") and hasattr(val2, "__dict__"):
+        _assert_vals_equal(val1.__dict__, val2.__dict__)
+    elif isinstance(val1, np.ufunc):
+        assert val1 == val2
+    elif val1.__class__.__module__ == "builtins":
+        assert val1 == val2
+    else:
+        _assert_generic_objects_equal(val1, val2)
+
+
+def assert_params_equal(params1, params2):
+    # helper function to compare estimator dictionaries of parameters
+    assert len(params1) == len(params2)
+    assert set(params1.keys()) == set(params2.keys())
+    for key in params1:
+        with warnings.catch_warnings():
+            # this is to silence the deprecation warning from _DictWithDeprecatedKeys
+            warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
+            val1, val2 = params1[key], params2[key]
+        assert type(val1) == type(val2)
+
+        if _is_steps_like(val1):
+            # Deal with Pipeline.steps, FeatureUnion.transformer_list, etc.
+            assert _is_steps_like(val2)
+            val1, val2 = dict(val1), dict(val2)
+
+        if isinstance(val1, (tuple, list)):
+            assert len(val1) == len(val2)
+            for subval1, subval2 in zip(val1, val2):
+                _assert_vals_equal(subval1, subval2)
+        elif isinstance(val1, dict):
+            assert_params_equal(val1, val2)
+        else:
+            _assert_vals_equal(val1, val2)
+
+
+def assert_method_outputs_equal(estimator, loaded, X):
+    # helper function that checks the output of all supported methods
+    for method in [
+        "predict",
+        "predict_proba",
+        "decision_function",
+        "transform",
+        "predict_log_proba",
+    ]:
+        err_msg = (
+            f"{estimator.__class__.__name__}.{method}() doesn't produce the same"
+            " results after loading the persisted model."
+        )
+        if hasattr(estimator, method):
+            X_out1 = getattr(estimator, method)(X)
+            X_out2 = getattr(loaded, method)(X)
+            assert_allclose_dense_sparse(X_out1, X_out2, err_msg=err_msg, atol=ATOL)
diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py
new file mode 100644
index 00000000..fc58f5f4
--- /dev/null
+++ b/skops/io/tests/test_external.py
@@ -0,0 +1,314 @@
+"""Test persistence of "external" packages
+
+Packages that are not builtins, standard lib, numpy, scipy, or scikit-learn.
+
+"""
+
+import pytest
+from sklearn.datasets import make_classification, make_regression
+
+from skops.io import dumps, loads
+from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal
+
+# Default settings for generated data
+N_SAMPLES = 30
+N_FEATURES = 10
+N_CLASSES = 4  # for classification only
+
+
+@pytest.fixture(scope="module")
+def clf_data():
+    X, y = make_classification(
+        n_samples=N_SAMPLES,
+        n_classes=N_CLASSES,
+        n_features=N_FEATURES,
+        random_state=0,
+        n_redundant=1,
+        n_informative=N_FEATURES - 1,
+    )
+    return X, y
+
+
+@pytest.fixture(scope="module")
+def regr_data():
+    X, y = make_regression(n_samples=N_SAMPLES, n_features=N_FEATURES, random_state=0)
+    return X, y
+
+
+@pytest.fixture(scope="module")
+def rank_data(clf_data):
+    X, y = clf_data
+    group = [10 for _ in range(N_SAMPLES // 10)]
+    n = sum(group)
+    if N_SAMPLES > n:
+        group[-1] += N_SAMPLES - n
+    assert sum(group) == N_SAMPLES
+    return X, y, group
+
+
+class TestLightGBM:
+    """Tests for LGBMClassifier, LGBMRegressor, LGBMRanker"""
+
+    @pytest.fixture(autouse=True)
+    def lgbm(self):
+        lgbm = pytest.importorskip("lightgbm")
+        return lgbm
+
+    @pytest.fixture
+    def trusted(self):
+        # TODO: adjust once more types are trusted by default
+        return [
+            "collections.defaultdict",
+            "lightgbm.basic.Booster",
+            "lightgbm.sklearn.LGBMClassifier",
+            "lightgbm.sklearn.LGBMRegressor",
+            "lightgbm.sklearn.LGBMRanker",
+            "numpy.int32",
+            "numpy.int64",
+            "sklearn.preprocessing._label.LabelEncoder",
+        ]
+
+    boosting_types = ["gbdt", "dart", "goss", "rf"]
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_classifier(self, lgbm, clf_data, trusted, boosting_type):
+        kw = {}
+        if boosting_type == "rf":
+            kw["bagging_fraction"] = 0.5
+            kw["bagging_freq"] = 2
+
+        estimator = lgbm.LGBMClassifier(boosting_type=boosting_type, **kw)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = clf_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_regressor(self, lgbm, regr_data, trusted, boosting_type):
+        kw = {}
+        if boosting_type == "rf":
+            kw["bagging_fraction"] = 0.5
+            kw["bagging_freq"] = 2
+
+        estimator = lgbm.LGBMRegressor(boosting_type=boosting_type, **kw)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = regr_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_ranker(self, lgbm, rank_data, trusted, boosting_type):
+        kw = {}
+        if boosting_type == "rf":
+            kw["bagging_fraction"] = 0.5
+            kw["bagging_freq"] = 2
+
+        estimator = lgbm.LGBMRanker(boosting_type=boosting_type, **kw)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y, group = rank_data
+        estimator.fit(X, y, group=group)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+
+class TestXGBoost:
+    """Tests for XGBClassifier, XGBRegressor, XGBRFClassifier, XGBRFRegressor, XGBRanker
+
+    Known bugs:
+
+    - When initialzing with tree_method=None, its value resolves to "exact", but
+      after loading, it resolves to "auto" when calling get_params().
+    - When initializing with tree_method='gpu_hist' and gpu_id=None, the
+      latter's value resolves to 0, but after loading, it resolves to -1, when
+      calling get_params()
+
+    These discrepancies occur regardless of skops, so they're a problem in
+    xgboost itself. We assume that this has no practical consequences and thus
+    avoid testing these cases. See https://github.com/dmlc/xgboost/issues/8596
+
+    """
+
+    @pytest.fixture(autouse=True)
+    def xgboost(self):
+        xgboost = pytest.importorskip("xgboost")
+        return xgboost
+
+    @pytest.fixture
+    def trusted(self):
+        # TODO: adjust once more types are trusted by default
+        return [
+            "xgboost.sklearn.XGBClassifier",
+            "xgboost.sklearn.XGBRegressor",
+            "xgboost.sklearn.XGBRFClassifier",
+            "xgboost.sklearn.XGBRFRegressor",
+            "xgboost.sklearn.XGBRanker",
+            "builtins.bytearray",
+            "xgboost.core.Booster",
+        ]
+
+    boosters = ["gbtree", "gblinear", "dart"]
+    tree_methods = ["approx", "hist", "auto"]
+
+    @pytest.mark.parametrize("booster", boosters)
+    @pytest.mark.parametrize("tree_method", tree_methods)
+    def test_classifier(self, xgboost, clf_data, trusted, booster, tree_method):
+        if (booster == "gblinear") and (tree_method != "approx"):
+            # This parameter combination is not supported in XGBoost
+            return
+
+        estimator = xgboost.XGBClassifier(booster=booster, tree_method=tree_method)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = clf_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("booster", boosters)
+    @pytest.mark.parametrize("tree_method", tree_methods)
+    def test_regressor(self, xgboost, regr_data, trusted, booster, tree_method):
+        if (booster == "gblinear") and (tree_method != "approx"):
+            # This parameter combination is not supported in XGBoost
+            return
+
+        estimator = xgboost.XGBRegressor(booster=booster, tree_method=tree_method)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = regr_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("booster", boosters)
+    @pytest.mark.parametrize("tree_method", tree_methods)
+    def test_rf_classifier(self, xgboost, clf_data, trusted, booster, tree_method):
+        if (booster == "gblinear") and (tree_method != "approx"):
+            # This parameter combination is not supported in XGBoost
+            return
+
+        estimator = xgboost.XGBRFClassifier(booster=booster, tree_method=tree_method)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = clf_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("booster", boosters)
+    @pytest.mark.parametrize("tree_method", tree_methods)
+    def test_rf_regressor(self, xgboost, regr_data, trusted, booster, tree_method):
+        if (booster == "gblinear") and (tree_method != "approx"):
+            # This parameter combination is not supported in XGBoost
+            return
+
+        estimator = xgboost.XGBRFRegressor(booster=booster, tree_method=tree_method)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = regr_data
+        estimator.fit(X, y)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("booster", boosters)
+    @pytest.mark.parametrize("tree_method", tree_methods)
+    def test_ranker(self, xgboost, rank_data, trusted, booster, tree_method):
+        if (booster == "gblinear") and (tree_method != "approx"):
+            # This parameter combination is not supported in XGBoost
+            return
+
+        estimator = xgboost.XGBRanker(booster=booster, tree_method=tree_method)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y, group = rank_data
+        estimator.fit(X, y, group=group)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+
+class TestCatboost:
+    """Tests for CatBoostClassifier, CatBoostRegressor, and CatBoostRanker"""
+
+    # CatBoost data is a little different so that it works as categorical data
+    @pytest.fixture(scope="module")
+    def cb_clf_data(self, clf_data):
+        X, y = clf_data
+        X = (X - X.min()).astype(int)
+        return X, y
+
+    @pytest.fixture(scope="module")
+    def cb_regr_data(self, regr_data):
+        X, y = regr_data
+        X = (X - X.min()).astype(int)
+        return X, y
+
+    @pytest.fixture(scope="module")
+    def cb_rank_data(self, rank_data):
+        X, y, group = rank_data
+        X = (X - X.min()).astype(int)
+        group_id = sum([[i] * n for i, n in enumerate(group)], [])
+        return X, y, group_id
+
+    @pytest.fixture(autouse=True)
+    def catboost(self):
+        catboost = pytest.importorskip("catboost")
+        return catboost
+
+    @pytest.fixture
+    def trusted(self):
+        # TODO: adjust once more types are trusted by default
+        return [
+            "builtins.bytes",
+            "numpy.float32",
+            "numpy.float64",
+            "catboost.core.CatBoostClassifier",
+            "catboost.core.CatBoostRegressor",
+            "catboost.core.CatBoostRanker",
+        ]
+
+    boosting_types = ["Ordered", "Plain"]
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_classifier(self, catboost, cb_clf_data, trusted, boosting_type):
+        estimator = catboost.CatBoostClassifier(boosting_type=boosting_type)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = cb_clf_data
+        estimator.fit(X, y, cat_features=[0, 1])
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_regressor(self, catboost, cb_regr_data, trusted, boosting_type):
+        estimator = catboost.CatBoostRegressor(boosting_type=boosting_type)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y = cb_regr_data
+        estimator.fit(X, y, cat_features=[0, 1])
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
+
+    @pytest.mark.parametrize("boosting_type", boosting_types)
+    def test_ranker(self, catboost, cb_rank_data, trusted, boosting_type):
+        estimator = catboost.CatBoostRanker(boosting_type=boosting_type)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_params_equal(estimator.get_params(), loaded.get_params())
+
+        X, y, group_id = cb_rank_data
+        estimator.fit(X, y, cat_features=[0, 1], group_id=group_id)
+        loaded = loads(dumps(estimator), trusted=trusted)
+        assert_method_outputs_equal(estimator, loaded, X)
diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index 012ab7f7..bd9c1e20 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -2,7 +2,6 @@
 import inspect
 import io
 import json
-import sys
 import warnings
 from collections import Counter
 from functools import partial, wraps
@@ -41,11 +40,7 @@
 )
 from sklearn.utils import all_estimators, check_random_state
 from sklearn.utils._tags import _safe_tags
-from sklearn.utils._testing import (
-    SkipTest,
-    assert_allclose_dense_sparse,
-    set_random_state,
-)
+from sklearn.utils._testing import SkipTest, set_random_state
 from sklearn.utils.estimator_checks import (
     _construct_instance,
     _enforce_estimator_tags_y,
@@ -59,15 +54,12 @@
 from skops.io._trusted_types import SKLEARN_ESTIMATOR_TYPE_NAMES
 from skops.io._utils import LoadContext, SaveContext, _get_state, get_state
 from skops.io.exceptions import UnsupportedTypeException
+from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal
 
 # Default settings for X
 N_SAMPLES = 50
 N_FEATURES = 20
 
-# TODO: Investigate why that seems to be an issue on MacOS (only observed with
-# Python 3.8)
-ATOL = 1e-6 if sys.platform == "darwin" else 1e-7
-
 
 @pytest.fixture(autouse=True, scope="module")
 def debug_dispatch_functions():
@@ -258,146 +250,6 @@ def _unsupported_estimators(type_filter=None):
         yield estimator
 
 
-def _is_steps_like(obj):
-    # helper function to check if an object is something like Pipeline.steps,
-    # i.e. a list of tuples of names and estimators
-    if not isinstance(obj, list):  # must be a list
-        return False
-
-    if not obj:  # must not be empty
-        return False
-
-    if not isinstance(obj[0], tuple):  # must be list of tuples
-        return False
-
-    lens = set(map(len, obj))
-    if not lens == {2}:  # all elements must be length 2 tuples
-        return False
-
-    keys, vals = list(zip(*obj))
-
-    if len(keys) != len(set(keys)):  # keys must be unique
-        return False
-
-    if not all(map(lambda x: isinstance(x, (type(None), BaseEstimator)), vals)):
-        # values must be BaseEstimators or None
-        return False
-
-    return True
-
-
-def _assert_generic_objects_equal(val1, val2):
-    def _is_builtin(val):
-        # Check if value is a builtin type
-        return getattr(getattr(val, "__class__", {}), "__module__", None) == "builtins"
-
-    if isinstance(val1, (list, tuple, np.ndarray)):
-        assert len(val1) == len(val2)
-        for subval1, subval2 in zip(val1, val2):
-            _assert_generic_objects_equal(subval1, subval2)
-            return
-
-    assert type(val1) == type(val2)
-    if hasattr(val1, "__dict__"):
-        assert_params_equal(val1.__dict__, val2.__dict__)
-    elif _is_builtin(val1):
-        assert val1 == val2
-    else:
-        # not a normal Python class, could be e.g. a Cython class
-        assert val1.__reduce__() == val2.__reduce__()
-
-
-def _assert_tuples_equal(val1, val2):
-    assert len(val1) == len(val2)
-    for subval1, subval2 in zip(val1, val2):
-        _assert_vals_equal(subval1, subval2)
-
-
-def _assert_vals_equal(val1, val2):
-    if hasattr(val1, "__getstate__"):
-        # This includes BaseEstimator since they implement __getstate__ and
-        # that returns the parameters as well.
-        #
-        # Some objects return a tuple of parameters, others a dict.
-        state1 = val1.__getstate__()
-        state2 = val2.__getstate__()
-        assert type(state1) == type(state2)
-        if isinstance(state1, tuple):
-            _assert_tuples_equal(state1, state2)
-        else:
-            assert_params_equal(val1.__getstate__(), val2.__getstate__())
-    elif sparse.issparse(val1):
-        assert sparse.issparse(val2) and ((val1 - val2).nnz == 0)
-    elif isinstance(val1, (np.ndarray, np.generic)):
-        if len(val1.dtype) == 0:
-            # for arrays with at least 2 dimensions, check that contiguity is
-            # preserved
-            if val1.squeeze().ndim > 1:
-                assert val1.flags["C_CONTIGUOUS"] is val2.flags["C_CONTIGUOUS"]
-                assert val1.flags["F_CONTIGUOUS"] is val2.flags["F_CONTIGUOUS"]
-            if val1.dtype == object:
-                assert val2.dtype == object
-                assert val1.shape == val2.shape
-                for subval1, subval2 in zip(val1, val2):
-                    _assert_generic_objects_equal(subval1, subval2)
-            else:
-                # simple comparison of arrays with simple dtypes, almost all
-                # arrays are of this sort.
-                np.testing.assert_array_equal(val1, val2)
-        elif len(val1.shape) == 1:
-            # comparing arrays with structured dtypes, but they have to be 1D
-            # arrays. This is what we get from the Tree's state.
-            assert np.all([x == y for x, y in zip(val1, val2)])
-        else:
-            # we don't know what to do with these values, for now.
-            assert False
-    elif isinstance(val1, (tuple, list)):
-        assert len(val1) == len(val2)
-        for subval1, subval2 in zip(val1, val2):
-            _assert_vals_equal(subval1, subval2)
-    elif isinstance(val1, float) and np.isnan(val1):
-        assert np.isnan(val2)
-    elif isinstance(val1, dict):
-        # dictionaries are compared by comparing their values recursively.
-        assert set(val1.keys()) == set(val2.keys())
-        for key in val1:
-            _assert_vals_equal(val1[key], val2[key])
-    elif hasattr(val1, "__dict__") and hasattr(val2, "__dict__"):
-        _assert_vals_equal(val1.__dict__, val2.__dict__)
-    elif isinstance(val1, np.ufunc):
-        assert val1 == val2
-    elif val1.__class__.__module__ == "builtins":
-        assert val1 == val2
-    else:
-        _assert_generic_objects_equal(val1, val2)
-
-
-def assert_params_equal(params1, params2):
-    # helper function to compare estimator dictionaries of parameters
-    assert len(params1) == len(params2)
-    assert set(params1.keys()) == set(params2.keys())
-    for key in params1:
-        with warnings.catch_warnings():
-            # this is to silence the deprecation warning from _DictWithDeprecatedKeys
-            warnings.filterwarnings("ignore", category=FutureWarning, module="sklearn")
-            val1, val2 = params1[key], params2[key]
-        assert type(val1) == type(val2)
-
-        if _is_steps_like(val1):
-            # Deal with Pipeline.steps, FeatureUnion.transformer_list, etc.
-            assert _is_steps_like(val2)
-            val1, val2 = dict(val1), dict(val2)
-
-        if isinstance(val1, (tuple, list)):
-            assert len(val1) == len(val2)
-            for subval1, subval2 in zip(val1, val2):
-                _assert_vals_equal(subval1, subval2)
-        elif isinstance(val1, dict):
-            assert_params_equal(val1, val2)
-        else:
-            _assert_vals_equal(val1, val2)
-
-
 @pytest.mark.parametrize(
     "estimator", _tested_estimators(), ids=_get_check_estimator_ids
 )
@@ -493,22 +345,7 @@ def test_can_persist_fitted(estimator):
     assert_params_equal(estimator.__dict__, loaded.__dict__)
 
     assert not any(type_ in SKLEARN_ESTIMATOR_TYPE_NAMES for type_ in untrusted_types)
-
-    for method in [
-        "predict",
-        "predict_proba",
-        "decision_function",
-        "transform",
-        "predict_log_proba",
-    ]:
-        err_msg = (
-            f"{estimator.__class__.__name__}.{method}() doesn't produce the same"
-            " results after loading the persisted model."
-        )
-        if hasattr(estimator, method):
-            X_pred1 = getattr(estimator, method)(X)
-            X_pred2 = getattr(loaded, method)(X)
-            assert_allclose_dense_sparse(X_pred1, X_pred2, err_msg=err_msg, atol=ATOL)
+    assert_method_outputs_equal(estimator, loaded, X)
 
 
 @pytest.mark.parametrize(
@@ -1002,3 +839,29 @@ def test_when_given_object_referenced_twice_loads_as_one_object(obj):
     persisted_object = loads(dumps(an_object), trusted=True)
 
     assert persisted_object["obj_1"] is persisted_object["obj_2"]
+
+
+class EstimatorWithBytes(BaseEstimator):
+    def fit(self, X, y, **fit_params):
+        self.bytes_ = b"hello"
+        self.bytearray_ = bytearray([0, 1, 2, 253, 254, 255])
+        return self
+
+
+def test_estimator_with_bytes():
+    est = EstimatorWithBytes().fit(None, None)
+    loaded = loads(dumps(est), trusted=True)
+    assert_params_equal(est.__dict__, loaded.__dict__)
+
+
+def test_estimator_with_bytes_files_created(tmp_path):
+    est = EstimatorWithBytes().fit(None, None)
+    f_name = tmp_path / "estimator.skops"
+    dump(est, f_name)
+    file = Path(f_name)
+    assert file.exists()
+
+    with ZipFile(f_name, "r") as input_zip:
+        files = input_zip.namelist()
+    bin_files = [file for file in files if file.endswith(".bin")]
+    assert len(bin_files) == 2