diff --git a/CHANGES.rst b/CHANGES.rst
index 5d547cfd6..aea8a22cc 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -15,6 +15,10 @@ development and backward compatibility is not ensured.
 Major changes
 -------------
 
+* :func:`to_datetime` is now available to support pandas.to_datetime
+  over dataframes and 2d arrays.
+  :pr:`784` by :user:`Vincent Maladiere <Vincent-Maladiere>`
+
 * Some parameters of :class:`Joiner` have changed. The goal is to harmonize
   parameters across all estimator that perform join(-like) operations, as
   discussed in `#751 <https://github.com/skrub-data/skrub/discussions/751>`_.
@@ -57,6 +61,11 @@ Major changes
 
 Minor changes
 -------------
+* :class:`DatetimeEncoder` doesn't remove constant features anymore.
+  It also supports an 'errors' argument to raise or coerce errors during
+  transform, and a 'add_total_seconds' argument to include the number of
+  seconds since Epoch.
+  :pr:`784` by :user:`Vincent Maladiere <Vincent-Maladiere>`
 
 
 * ``inverse_transform`` in :class:`SimilarityEncoder` now works as expected; it used to raise an exception. :pr:`801` by :user:`Jérôme Dockès <jeromedockes>`.
diff --git a/doc/api.rst b/doc/api.rst
index 99acf2807..730fe37cb 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -79,7 +79,7 @@ This page lists all available functions and classes of `skrub`.
 
 .. raw:: html
 
-   <h2>Other encoders</h2>
+   <h2>Dealing with dates</h2>
 
 .. autosummary::
    :toctree: generated/
@@ -89,6 +89,14 @@ This page lists all available functions and classes of `skrub`.
 
    DatetimeEncoder
 
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+   :nosignatures:
+   :caption: Converting datetime columns in a table
+
+   to_datetime
+
 .. raw:: html
 
    <h2>Deduplication: merging variants of the same entry</h2>
diff --git a/doc/conf.py b/doc/conf.py
index 710f4d69a..b1bccad12 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -504,6 +504,7 @@ def notebook_modification_function(notebook_content, notebook_filename):
     "SimilarityEncoder": "skrub.SimilarityEncoder",
     "DatetimeEncoder": "skrub.DatetimeEncoder",
     "deduplicate": "skrub.deduplicate",
+    "to_datetime": "skrub.to_datetime",
     "TableVectorizer": "skrub.TableVectorizer",
     "DatasetInfoOnly": "skrub.datasets._fetching.DatasetInfoOnly",
     "DatasetAll": "skrub.datasets._fetching.DatasetAll",
diff --git a/examples/03_datetime_encoder.py b/examples/03_datetime_encoder.py
index 358187a7b..d89dff662 100644
--- a/examples/03_datetime_encoder.py
+++ b/examples/03_datetime_encoder.py
@@ -34,6 +34,9 @@
 
 .. |HGBR| replace::
     :class:`~sklearn.ensemble.HistGradientBoostingRegressor`
+
+.. |to_datetime| replace::
+    :func:`~skrub.to_datetime`
 """
 
 
@@ -46,19 +49,26 @@
 # on the location, date and time of measurement.
 
 from pprint import pprint
-
 import pandas as pd
 
 data = pd.read_csv(
     "https://raw.githubusercontent.com/pandas-dev/pandas"
     "/main/doc/data/air_quality_no2_long.csv"
-)
+).sort_values("date.utc")
 # Extract our input data (X) and the target column (y)
 y = data["value"]
 X = data[["city", "date.utc"]]
 
 X
 
+###############################################################################
+# We convert the dataframe date columns using |to_datetime|. Notice how
+# we don't need to specify the columns to convert.
+from skrub import to_datetime
+
+X = to_datetime(X)
+X.dtypes
+
 ###############################################################################
 # Encoding the features
 # .....................
@@ -73,14 +83,12 @@
 # lower units, as they are probably unimportant.
 
 from sklearn.preprocessing import OneHotEncoder
-
-from skrub import DatetimeEncoder
-
 from sklearn.compose import make_column_transformer
+from skrub import DatetimeEncoder
 
 encoder = make_column_transformer(
     (OneHotEncoder(handle_unknown="ignore"), ["city"]),
-    (DatetimeEncoder(add_day_of_the_week=True, extract_until="minute"), ["date.utc"]),
+    (DatetimeEncoder(add_day_of_the_week=True, resolution="minute"), ["date.utc"]),
     remainder="drop",
 )
 
@@ -88,12 +96,9 @@
 pprint(encoder.get_feature_names_out())
 
 ###############################################################################
-# We see that the encoder is working as expected: the "date.utc" column has
-# been replaced by features extracting the month, day, hour, and day of the
-# week information.
-#
-# Note the year and minute features are not present, this is because they
-# have been removed by the encoder as they are constant the whole period.
+# We see that the encoder is working as expected: the ``"date.utc"`` column has
+# been replaced by features extracting the month, day, hour, minute, day of the
+# week and total second since Epoch information.
 
 ###############################################################################
 # One-liner with the |TableVectorizer|
@@ -104,8 +109,7 @@
 
 from skrub import TableVectorizer
 
-table_vec = TableVectorizer()
-table_vec.fit_transform(X)
+table_vec = TableVectorizer().fit(X)
 pprint(table_vec.get_feature_names_out())
 
 ###############################################################################
@@ -116,8 +120,7 @@
 
 table_vec = TableVectorizer(
     datetime_transformer=DatetimeEncoder(add_day_of_the_week=True),
-)
-table_vec.fit_transform(X)
+).fit(X)
 pprint(table_vec.get_feature_names_out())
 
 ###############################################################################
@@ -144,14 +147,9 @@
 #    ```py
 #    from sklearn.experimental import enable_hist_gradient_boosting
 #    ```
-
-import numpy as np
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.pipeline import make_pipeline
 
-table_vec = TableVectorizer(
-    datetime_transformer=DatetimeEncoder(add_day_of_the_week=True),
-)
 pipeline = make_pipeline(table_vec, HistGradientBoostingRegressor())
 
 ###############################################################################
@@ -164,11 +162,6 @@
 #
 # Instead, we can use the |TimeSeriesSplit|,
 # which ensures that the test set is always in the future.
-
-sorted_indices = np.argsort(X["date.utc"])
-X = X.iloc[sorted_indices]
-y = y.iloc[sorted_indices]
-
 from sklearn.model_selection import TimeSeriesSplit, cross_val_score
 
 cross_val_score(
@@ -185,82 +178,71 @@
 #
 # The mean squared error is not obvious to interpret, so we compare
 # visually the prediction of our model with the actual values.
-
+import numpy as np
 import matplotlib.pyplot as plt
-from matplotlib.dates import AutoDateFormatter, AutoDateLocator
-
-X_train = X[X["date.utc"] < "2019-06-01"]
-X_test = X[X["date.utc"] >= "2019-06-01"]
 
-y_train = y[X["date.utc"] < "2019-06-01"]
-y_test = y[X["date.utc"] >= "2019-06-01"]
+mask_train = X["date.utc"] < "2019-06-01"
+X_train, X_test = X.loc[mask_train], X.loc[~mask_train]
+y_train, y_test = y.loc[mask_train], y.loc[~mask_train]
 
 pipeline.fit(X_train, y_train)
+y_pred = pipeline.predict(X_test)
 
 all_cities = X_test["city"].unique()
 
-fig, axs = plt.subplots(nrows=len(all_cities), ncols=1, figsize=(12, 9))
-fig.subplots_adjust(hspace=0.5)
+fig, axes = plt.subplots(nrows=len(all_cities), ncols=1, figsize=(12, 9))
+for ax, city in zip(axes, all_cities):
+    mask_prediction = X_test["city"] == city
+    date_prediction = X_test.loc[mask_prediction]["date.utc"]
+    y_prediction = y_pred[mask_prediction]
 
-for i, city in enumerate(all_cities):
-    axs[i].plot(
-        X.loc[X.city == city, "date.utc"],
-        y.loc[X.city == city],
-        label="Actual",
-    )
-    axs[i].plot(
-        X_test.loc[X_test.city == city, "date.utc"],
-        pipeline.predict(X_test.loc[X_test.city == city]),
-        label="Predicted",
+    mask_reference = X["city"] == city
+    date_reference = X.loc[mask_reference]["date.utc"]
+    y_reference = y[mask_reference]
+
+    ax.plot(date_reference, y_reference, label="Actual")
+    ax.plot(date_prediction, y_prediction, label="Predicted")
+
+    ax.set(
+        ylabel="NO2",
+        title=city,
     )
-    axs[i].set_title(city)
-    axs[i].set_ylabel("NO2")
-    xtick_locator = AutoDateLocator(maxticks=8)
-    xtick_formatter = AutoDateFormatter(xtick_locator)
-    axs[i].xaxis.set_major_locator(xtick_locator)
-    axs[i].xaxis.set_major_formatter(xtick_formatter)
-    axs[i].legend()
+    ax.legend()
+
+fig.subplots_adjust(hspace=0.5)
 plt.show()
 
 ###############################################################################
 # Let's zoom on a few days:
 
-X_zoomed = X[(X["date.utc"] <= "2019-06-04") & (X["date.utc"] >= "2019-06-01")]
-y_zoomed = y[(X["date.utc"] <= "2019-06-04") & (X["date.utc"] >= "2019-06-01")]
-
-X_train_zoomed = X_zoomed[X_zoomed["date.utc"] < "2019-06-03"]
-X_test_zoomed = X_zoomed[X_zoomed["date.utc"] >= "2019-06-03"]
+mask_zoom_reference = (X["date.utc"] >= "2019-06-01") & (X["date.utc"] < "2019-06-04")
+mask_zoom_prediction = (X_test["date.utc"] >= "2019-06-01") & (
+    X_test["date.utc"] < "2019-06-04"
+)
 
-y_train_zoomed = y[X["date.utc"] < "2019-06-03"]
-y_test_zoomed = y[X["date.utc"] >= "2019-06-03"]
+all_cities = ["Paris", "London"]
+fig, axes = plt.subplots(nrows=len(all_cities), ncols=1, figsize=(12, 9))
+for ax, city in zip(axes, all_cities):
+    mask_prediction = (X_test["city"] == city) & mask_zoom_prediction
+    date_prediction = X_test.loc[mask_prediction]["date.utc"]
+    y_prediction = y_pred[mask_prediction]
 
-zoomed_cities = X_test_zoomed["city"].unique()
+    mask_reference = (X["city"] == city) & mask_zoom_reference
+    date_reference = X.loc[mask_reference]["date.utc"]
+    y_reference = y[mask_reference]
 
-fig, axs = plt.subplots(nrows=len(zoomed_cities), ncols=1, figsize=(12, 9))
-fig.subplots_adjust(hspace=0.5)
+    ax.plot(date_reference, y_reference, label="Actual")
+    ax.plot(date_prediction, y_prediction, label="Predicted")
 
-for i, city in enumerate(zoomed_cities):
-    axs[i].plot(
-        X_zoomed.loc[X_zoomed["city"] == city, "date.utc"],
-        y_zoomed.loc[X_zoomed["city"] == city],
-        label="Actual",
-    )
-    axs[i].plot(
-        X_test_zoomed.loc[X_test_zoomed["city"] == city, "date.utc"],
-        pipeline.predict(X_test_zoomed.loc[X_test_zoomed["city"] == city]),
-        label="Predicted",
+    ax.set(
+        ylabel="NO2",
+        title=city,
     )
-    axs[i].set_title(city)
-    axs[i].set_ylabel("NO2")
-
-    xtick_locator = AutoDateLocator(maxticks=8)
-    xtick_formatter = AutoDateFormatter(xtick_locator)
-    axs[i].xaxis.set_major_locator(xtick_locator)
-    axs[i].xaxis.set_major_formatter(xtick_formatter)
+    ax.legend()
 
-    axs[i].legend()
 plt.show()
 
+
 ###############################################################################
 # Features importance
 # -------------------
@@ -280,27 +262,28 @@
 
 # In this case, we don't use a pipeline, because we want to compute the
 # importance of the features created by the DatetimeEncoder
-X_ = table_vec.fit_transform(X)
-reg = HistGradientBoostingRegressor().fit(X_, y)
-result = permutation_importance(reg, X_, y, n_repeats=10, random_state=0)
-std = result.importances_std
-importances = result.importances_mean
-indices = np.argsort(importances)
-# Sort from least to most
-indices = list(reversed(indices))
-
-plt.figure(figsize=(12, 9))
-plt.title("Feature importances")
-n = len(indices)
-labels = np.array(table_vec.get_feature_names_out())[indices]
-plt.barh(range(n), importances[indices], color="b", yerr=std[indices])
-plt.yticks(range(n), labels, size=15)
-plt.tight_layout(pad=1)
-plt.show()
+X_transform = table_vec.fit_transform(X)
+feature_names = table_vec.get_feature_names_out()
+
+model = HistGradientBoostingRegressor().fit(X_transform, y)
+result = permutation_importance(model, X_transform, y, n_repeats=10, random_state=0)
+
+result = pd.DataFrame(
+    dict(
+        feature_names=feature_names,
+        std=result.importances_std,
+        importances=result.importances_mean,
+    )
+).sort_values("importances", ascending=False)
+
+result.plot.barh(
+    y="importances", x="feature_names", title="Feature Importances", figsize=(12, 9)
+)
+plt.tight_layout()
 
 ###############################################################################
-# We can see that the hour of the day is the most important feature,
-# which seems reasonable.
+# We can see that the total seconds since Epoch and the hour of the day
+# are the most important feature, which seems reasonable.
 #
 # Conclusion
 # ----------
diff --git a/skrub/__init__.py b/skrub/__init__.py
index 868632080..a55cc134a 100644
--- a/skrub/__init__.py
+++ b/skrub/__init__.py
@@ -5,7 +5,7 @@
 
 from ._agg_joiner import AggJoiner, AggTarget
 from ._check_dependencies import check_dependencies
-from ._datetime_encoder import DatetimeEncoder
+from ._datetime_encoder import DatetimeEncoder, to_datetime
 from ._deduplicate import compute_ngram_distance, deduplicate
 from ._fuzzy_join import fuzzy_join
 from ._gap_encoder import GapEncoder
@@ -34,6 +34,7 @@
     "TargetEncoder",
     "deduplicate",
     "compute_ngram_distance",
+    "to_datetime",
     "AggJoiner",
     "AggTarget",
     "SelectCols",
diff --git a/skrub/_agg_joiner.py b/skrub/_agg_joiner.py
index ed369964b..0e461c3bf 100644
--- a/skrub/_agg_joiner.py
+++ b/skrub/_agg_joiner.py
@@ -155,7 +155,7 @@ class AggJoiner(BaseEstimator, TransformerMixin):
        airportId airportName company_mode_1  total_passengers_mean_1
     0          1   Paris CDG             AF               103.33...
     1          2      NY JFK             DL                80.00...
-    """  # noqa: E501
+    """
 
     def __init__(
         self,
@@ -416,18 +416,19 @@ class AggTarget(BaseEstimator, TransformerMixin):
     ...     "company": ["DL", "AF", "AF", "DL", "DL", "TR"],
     ... })
     >>> y = np.array([1, 1, 0, 0, 1, 1])
-    >>> join_agg = AggTarget(
+    >>> agg_target = AggTarget(
     ...     main_key="company",
     ...     operation=["mean", "max"],
     ... )
-    >>> join_agg.fit_transform(X, y)
+    >>> agg_target.fit_transform(X, y)
        flightId  from_airport  ...  y_0_max_target y_0_mean_target
-    0         1             1  ...               1        0.66...
-    1         2             1  ...               1        0.50...
-    2         3             1  ...               1        0.50...
-    3         4             2  ...               1        0.66...
-    4         5             2  ...               1        0.66...
-    5         6             2  ...               1        1.00...
+    0         1             1  ...               1        0.666667
+    1         2             1  ...               1        0.500000
+    2         3             1  ...               1        0.500000
+    3         4             2  ...               1        0.666667
+    4         5             2  ...               1        0.666667
+    5         6             2  ...               1        1.000000
+    <BLANKLINE>
     [6 rows x 6 columns]
     """
 
diff --git a/skrub/_datetime_encoder.py b/skrub/_datetime_encoder.py
index dccb39301..21839c41d 100644
--- a/skrub/_datetime_encoder.py
+++ b/skrub/_datetime_encoder.py
@@ -1,14 +1,18 @@
-from typing import Literal
+import warnings
+from collections import defaultdict
+from typing import Iterable
 
 import numpy as np
 import pandas as pd
-from numpy.typing import ArrayLike, NDArray
+from pandas._libs.tslibs.parsing import guess_datetime_format
 from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.utils import check_array
+from sklearn.utils.fixes import parse_version
 from sklearn.utils.validation import check_is_fitted
 
-from skrub._utils import check_input
+from ._dataframe._namespace import get_df_namespace
 
-WORD_TO_ALIAS: dict[str, str] = {
+WORD_TO_ALIAS = {
     "year": "Y",
     "month": "M",
     "day": "D",
@@ -18,78 +22,490 @@
     "microsecond": "us",
     "nanosecond": "N",
 }
-TIME_LEVELS: list[str] = list(WORD_TO_ALIAS.keys())
-AcceptedTimeValues = Literal[
-    "year",
-    "month",
-    "day",
-    "hour",
-    "minute",
-    "second",
-    "microsecond",
-    "nanosecond",
-]
-
-
-class DatetimeEncoder(BaseEstimator, TransformerMixin):
-    """Transform each datetime column into several numeric columns \
-    for temporal features (e.g. "year", "month", "day"...).
-
-    Constant extracted features are dropped; for instance, if the year is
-    always the same in a feature, the extracted "year" column won't be added.
+TIME_LEVELS = list(WORD_TO_ALIAS)
+
+
+def _is_pandas_format_mixed_available():
+    pandas_version = pd.__version__
+    min_pandas_version = "2.0.0"
+    return parse_version(min_pandas_version) < parse_version(pandas_version)
+
+
+MIXED_FORMAT = "mixed" if _is_pandas_format_mixed_available() else None
+
+
+def to_datetime(
+    X,
+    errors="coerce",
+    **kwargs,
+):
+    """Convert the columns of a dataframe or 2d array into a datetime representation.
+
+    This function augments :func:`pandas.to_datetime` by supporting dataframes
+    and 2d array inputs. It only attempts to convert columns whose dtype are
+    object or string. Numeric columns are skip and preserved in the output.
+
+    Use the 'format' keyword to force a specific datetime format. See more details in
+    the parameters section.
+
+    Parameters
+    ----------
+    X : Pandas or Polars dataframe, 2d-array or any input accepted \
+    by ``pd.to_datetime``
+        The object to convert to a datetime.
+
+    errors : {'coerce', 'raise'}, default 'coerce'
+        When set to 'raise', errors will be raised only when the following conditions
+        are satisfied, for each column ``X_col``:
+        - After converting to numpy, the column dtype is np.object_ or np.str_
+        - Each entry of the column is datetime-parsable, i.e.
+          ``pd.to_datetime(X_col, format="mixed")`` doesn't raise an error.
+          This step is conservative, because e.g.
+          ``["2020-01-01", "hello", "2020-01-01"]``
+          is not considered datetime-parsable, so we won't attempt to convert it).
+        - The column as a whole is not datetime-parsable, due to a clash of datetime
+          format, e.g. '2020/01/01' and '2020-01-01'.
+
+        When set to ``'coerce'``, the entries of ``X_col`` that should have raised
+        an error are set to ``NaT`` instead.
+        You can choose which format to use with the keyword argument ``format``, as with
+        ``pd.to_datetime``, e.g. ``to_datetime(X_col, format='%Y/%m/%d')``.
+        Combined with ``error='coerce'``, this will convert all entries that don't
+        match this format to ``NaT``.
+
+        Note that the ``'ignore'`` option is not used and will raise an error.
+
+    **kwargs : key, value mappings
+        Other keyword arguments are passed down to :func:`pandas.to_datetime`.
+
+        One notable argument is 'format'. Setting a format overwrites
+        the datetime format guessing behavior of this function for all columns.
+
+        Note that we don't encourage you to use dayfirst or monthfirst argument, since
+        their behavior is ambiguous and might not be applied at all.
+
+        Moreover, this function raises an error if 'unit' is set to any value.
+        This is because, in ``pandas.to_datetime``, 'unit' is specific to timestamps,
+        whereas in ``skrub.to_datetime`` we don't attempt to parse numeric columns.
+
+    Returns
+    -------
+    datetime
+        Return type depends on input.
+        - dataframes, series and 2d arrays return the same type
+        - otherwise return the same output as :func:`pandas.to_datetime`.
+
+    See Also
+    --------
+    :func:`pandas.to_datetime`
+        Convert argument to datetime.
+
+    Examples
+    --------
+    >>> X = pd.DataFrame(dict(a=[1, 2], b=["2021-01-01", "2021-02-02"]))
+    >>> X
+       a          b
+    0  1 2021-01-01
+    1  2 2021-02-02
+    >>> to_datetime(X)
+       a          b
+    0  1 2021-01-01
+    1  2 2021-02-02
+    """
+    errors_options = ["coerce", "raise"]
+    if errors not in errors_options:
+        raise ValueError(f"errors options are {errors_options!r}, got {errors!r}.")
+    kwargs["errors"] = errors
+
+    if "unit" in kwargs:
+        raise ValueError(
+            "'unit' is not a parameter of skrub.to_datetime; it is only meaningful "
+            "when applying pandas.to_datetime to a numerical column"
+        )
+
+    # dataframe
+    if hasattr(X, "__dataframe__"):
+        return _to_datetime_dataframe(X, **kwargs)
+
+    # series, this attribute is available since Pandas 2.1.0
+    elif hasattr(X, "__column_consortium_standard__"):
+        return _to_datetime_series(X, **kwargs)
+
+    # 2d array
+    elif isinstance(X, Iterable) and np.asarray(X).ndim == 2:
+        X = _to_datetime_2d_array(np.asarray(X), **kwargs)
+        return np.vstack(X).T
+
+    # 1d array
+    elif isinstance(X, Iterable) and np.asarray(X).ndim == 1:
+        return _to_datetime_1d_array(np.asarray(X), **kwargs)
+
+    # scalar or unknown type
+    elif np.asarray(X).ndim == 0:
+        return _to_datetime_scalar(X, **kwargs)
+
+    else:
+        raise TypeError(
+            "X must be a Dataframe, series, 2d array or any "
+            f"valid input for ``pd.to_datetime``. Got {X=!r}."
+        )
+
+
+def _to_datetime_dataframe(X, **kwargs):
+    """Dataframe specialization of ``_to_datetime_2d``.
+
+    Parameters
+    ----------
+    X : Pandas or Polars dataframe
+
+    Returns
+    -------
+    X : Pandas or Polars dataframe
+    """
+    skrub_px, _ = get_df_namespace(X)
+    index = getattr(X, "index", None)
+    X_split = [X[col].to_numpy() for col in X.columns]
+    X_split = _to_datetime_2d(X_split, **kwargs)
+    X_split = {col: X_split[col_idx] for col_idx, col in enumerate(X.columns)}
+    return skrub_px.make_dataframe(X_split, index=index)
+
+
+def _to_datetime_series(X, **kwargs):
+    """Series specialization of :func:`pandas.to_datetime`.
+
+    Parameters
+    ----------
+    X : Pandas or Polars series
+
+    Returns
+    -------
+    X : Pandas or Polars series
+    """
+    skrub_px, _ = get_df_namespace(X.to_frame())
+    index = getattr(X, "index", None)
+    name = X.name
+    X_split = [X.to_numpy()]
+    X_split = _to_datetime_2d(X_split, **kwargs)
+    return skrub_px.make_series(X_split[0], index=index, name=name)
+
+
+def _to_datetime_2d_array(X, **kwargs):
+    """2d array specialization of ``_to_datetime_2d``.
+
+    Parameters
+    ----------
+    X : ndarray of shape ``(n_samples, n_features)``
+
+    Returns
+    -------
+    X_split : list of array, of shape ``n_features``
+    """
+    X_split = list(X.T)
+    return _to_datetime_2d(X_split, **kwargs)
+
+
+def _to_datetime_1d_array(X, **kwargs):
+    X_split = [X]
+    X_split = _to_datetime_2d(X_split, **kwargs)
+    return np.asarray(X_split[0])
+
+
+def _to_datetime_scalar(X, **kwargs):
+    X_split = [np.atleast_1d(X)]
+    X_split = _to_datetime_2d(X_split, **kwargs)
+    return X_split[0][0]
+
+
+def _to_datetime_2d(
+    X_split,
+    indices=None,
+    index_to_format=None,
+    format=None,
+    **kwargs,
+):
+    """Convert datetime parsable columns from a 2d array or dataframe \
+        to datetime format.
+
+    The conversion is done inplace.
+
+    Parameters
+    ----------
+    X_split : list of 1d array of length n_features
+        The 2d input, chunked into a list of array. This format allows us
+        to treat each column individually and preserve their dtype, because
+        dataframe.to_numpy() casts all columns to object when at least one
+        column dtype is object.
+
+    indices : list of int, default=None
+        Indices of the parsable columns to convert.
+        If None, indices are computed using the current input X.
+
+    index_to_format : mapping of int to str, default=None
+        Dictionary mapping column indices to their datetime format.
+        It defines the format parameter for each column when calling
+        pd.to_datetime.
+
+        If indices is None, ``indices_to_format`` is computed using the
+        current input X.
+        If format is not None, all values of ``indices_to_format`` are set
+        to format.
+
+    format : str, default=None
+        When format is not None, it overwrites the values in indices_to_format.
+
+    Returns
+    -------
+    X_split : list of 1d array of length n_features
+    """
+    if indices is None:
+        indices, index_to_format = _get_datetime_column_indices(X_split)
+
+    # format overwrite indices_to_format
+    if format is not None:
+        index_to_format = {col_idx: format for col_idx in indices}
+
+    for col_idx in indices:
+        X_split[col_idx] = pd.to_datetime(
+            X_split[col_idx], format=index_to_format[col_idx], **kwargs
+        )
+
+    return X_split
+
+
+def _get_datetime_column_indices(X_split, dayfirst=True):
+    """Select the datetime parsable columns by their indices \
+    and return their datetime format.
+
+    Parameters
+    ----------
+    X_split : list of 1d array of length n_features
+
+    Returns
+    -------
+    datetime_indices : list of int
+        List of parsable column, identified by their indices.
+
+    index_to_format: mapping of int to str
+        Dictionary mapping parsable column indices to their datetime format.
+    """
+    indices = []
+    index_to_format = {}
+
+    for col_idx, X_col in enumerate(X_split):
+        X_col = X_col[pd.notnull(X_col)]
+
+        # convert pd.TimeStamp to np.datetime64
+        if all(isinstance(val, pd.Timestamp) for val in X_col):
+            X_col = X_col.astype("datetime64")
+
+        if _is_column_datetime_parsable(X_col):
+            indices.append(col_idx)
+
+            if np.issubdtype(X_col.dtype, np.datetime64):
+                # We don't need to specify a parsing format
+                # for columns that are already of type datetime64.
+                datetime_format = None
+            else:
+                datetime_format = _guess_datetime_format(X_col)
+
+            index_to_format[col_idx] = datetime_format
+
+    return indices, index_to_format
+
+
+def _is_column_datetime_parsable(X_col):
+    """Check whether a 1d array can be converted into a \
+    :class:`pandas.DatetimeIndex`.
+
+    Parameters
+    ----------
+    X_col : array-like of shape ``(n_samples,)``
+
+    Returns
+    -------
+    is_dt_parsable : bool
+    """
+    # Remove columns of int, float or bool casted as object.
+    # Pandas < 2.0.0 raise a deprecation warning instead of an error.
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=DeprecationWarning)
+        try:
+            if np.array_equal(X_col, X_col.astype(np.float64)):
+                return False
+        except ValueError:
+            pass
+
+    np_dtypes_candidates = [np.object_, np.str_, np.datetime64]
+    is_type_datetime_compatible = any(
+        np.issubdtype(X_col.dtype, np_dtype) for np_dtype in np_dtypes_candidates
+    )
+    if is_type_datetime_compatible:
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=UserWarning)
+                # format=mixed parses entries individually,
+                # avoiding ValueError when both date and datetime formats
+                # are present.
+                # At this stage, the format itself doesn't matter.
+                _ = pd.to_datetime(X_col, format=MIXED_FORMAT)
+            return True
+        except (pd.errors.ParserError, ValueError):
+            pass
+    return False
+
+
+def _guess_datetime_format(X_col):
+    """Infer the format of a 1d array.
+
+    This functions uses Pandas ``guess_datetime_format`` routine for both
+    dayfirst and monthfirst case, and select either format when using one
+    give a unify format on the array.
+
+    When both dayfirst and monthfirst format are possible, we select
+    monthfirst by default.
+
+    You can overwrite this behaviour by setting a format of the caller function.
+    Setting a format always take precedence over infering it using
+    ``_guess_datetime_format``.
+
+    Parameters
+    ----------
+    X_col : ndarray of shape ``(n_samples,)``
+        X_col must only contains string objects without any missing value.
+
+    Returns
+    -------
+    datetime_format : str or None
+    """
+    X_col = X_col.astype(np.object_)
+    vfunc = np.vectorize(guess_datetime_format)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=UserWarning)
+        # pd.unique handles None
+        month_first_formats = pd.unique(vfunc(X_col, dayfirst=False))
+        day_first_formats = pd.unique(vfunc(X_col, dayfirst=True))
+
+    if len(month_first_formats) == 1 and month_first_formats[0] is not None:
+        return str(month_first_formats[0])
+
+    elif len(day_first_formats) == 1 and day_first_formats[0] is not None:
+        return str(day_first_formats[0])
+
+    # special heuristic: when both date and datetime formats are
+    # present, allow the format to be mixed.
+    elif (
+        len(month_first_formats) == 2
+        and len(day_first_formats) == 2
+        and len(month_first_formats[0]) != len(month_first_formats[1])
+    ):
+        return MIXED_FORMAT
+
+    else:
+        return None
+
+
+def _is_column_date_only(X_col):
+    """Check whether a :obj:`pandas.DatetimeIndex` only contains dates.
+
+    Parameters
+    ----------
+    X_col : pandas.DatetimeIndex of shape ``(n_samples,)``
+
+    Returns
+    -------
+    is_date : bool
+    """
+    return np.array_equal(X_col, X_col.normalize())
+
+
+def _datetime_to_total_seconds(X_col):
+    """
+    Parameters
+    ----------
+    X_col : DatetimeIndex of shape (n_samples,)
+
+    Returns
+    -------
+    X_col : ndarray of shape (n_samples)
+    """
+    if X_col.tz is not None:
+        X_col = X_col.tz_convert("utc")
+
+    # Total seconds since epoch
+    mask_notnull = X_col == X_col
+
+    return np.where(
+        mask_notnull,
+        X_col.astype("int64") / 1e9,
+        np.nan,
+    )
+
+
+class DatetimeEncoder(TransformerMixin, BaseEstimator):
+    """Transforms each datetime column into several numeric columns \
+    for temporal features (e.g year, month, day...).
+
     If the dates are timezone aware, all the features extracted will correspond
     to the provided timezone.
 
     Parameters
     ----------
-    extract_until : {"year", "month", "day", "hour", "minute", "second",
+    resolution : {"year", "month", "day", "hour", "minute", "second",
         "microsecond", "nanosecond", None}, default="hour"
-        Extract up to this granularity.
-        If all non-constant features have not been extracted,
-        add the "total_time" feature, which contains the time to epoch (in seconds).
-        For instance, if you specify "day", only "year", "month", "day" and
-        "total_time" features will be created.
-        If None, only the "total_time" feature will be created.
+        Extract up to this resolution.
+        E.g., ``resolution="day"`` generates the features "year", "month",
+        "day" only.
+        If ``None``, no such feature will be created (but day of the week and \
+            total seconds may still be extracted, see below).
+
     add_day_of_the_week : bool, default=False
-        Add day of the week feature (if day is extracted).
-        This is a numerical feature from 0 (Monday) to 6 (Sunday).
+        Add day of the week feature as a numerical feature
+        from 0 (Monday) to 6 (Sunday).
+
+    add_total_seconds : bool, default=True
+        Add the total number of seconds since Epoch.
+
+    errors : {'coerce', 'raise'}, default="coerce"
+        During transform:
+        - If ``"coerce"``, then invalid parsing will be set as ``pd.NaT``.
+        - If ``"raise"``, then invalid parsing will raise an exception.
 
     Attributes
     ----------
-    n_features_in_ : int
-        Number of features in the data seen during fit.
+    column_indices_ : list of int
+        Indices of the datetime-parsable columns.
+
+    index_to_format_ : dict[int, str]
+        Mapping from column indices to their datetime formats.
+
+    index_to_features_ : dict[int, list[str]]
+        Dictionary mapping the column names to the list of datetime
+        features extracted for each column.
+
     n_features_out_ : int
         Number of features of the transformed data.
-    features_per_column_ : mapping of int to list of str
-        Dictionary mapping the index of the original columns
-        to the list of features extracted for each column.
-    col_names_ : None or list of str
-        List of the names of the features of the input data,
-        if input data was a pandas DataFrame, otherwise None.
 
     See Also
     --------
     GapEncoder :
         Encode dirty categories (strings) by constructing
         latent topics with continuous encoding.
+
     MinHashEncoder :
         Encode string columns as a numeric array with the minhash method.
+
     SimilarityEncoder :
         Encode string columns as a numeric array with n-gram string similarity.
 
     Examples
     --------
-    >>> enc = DatetimeEncoder()
-
-    Let's encode the following dates:
-
+    >>> enc = DatetimeEncoder(add_total_seconds=False)
     >>> X = [['2022-10-15'], ['2021-12-25'], ['2020-05-18'], ['2019-10-15 12:00:00']]
-
     >>> enc.fit(X)
-    DatetimeEncoder()
+    DatetimeEncoder(add_total_seconds=False)
 
     The encoder will output a transformed array
-    with four columns ("year", "month", "day" and "hour"):
+    with four columns ("year", "month", "day", "hour"):
 
     >>> enc.transform(X)
     array([[2022.,   10.,   15.,    0.],
@@ -98,80 +514,31 @@ class DatetimeEncoder(BaseEstimator, TransformerMixin):
            [2019.,   10.,   15.,   12.]])
     """
 
-    n_features_in_: int
-    n_features_out_: int
-    features_per_column_: dict[int, list[str]]
-    col_names_: list[str] | None
-
     def __init__(
         self,
         *,
-        extract_until: AcceptedTimeValues | None = "hour",
-        add_day_of_the_week: bool = False,
+        resolution="hour",
+        add_day_of_the_week=False,
+        add_total_seconds=True,
+        errors="coerce",
     ):
-        self.extract_until = extract_until
+        self.resolution = resolution
         self.add_day_of_the_week = add_day_of_the_week
+        self.add_total_seconds = add_total_seconds
+        self.errors = errors
 
-    def _more_tags(self):
-        """
-        Used internally by sklearn to ease the estimator checks.
-        """
-        return {
-            "X_types": ["2darray", "categorical"],
-            "allow_nan": True,
-            "_xfail_checks": {"check_dtype_object": "Specific datetime error."},
-        }
-
-    def _validate_keywords(self):
-        if self.extract_until not in TIME_LEVELS and self.extract_until is not None:
-            raise ValueError(
-                f'"extract_until" should be one of {TIME_LEVELS}, '
-                f"got {self.extract_until}. "
-            )
-
-    @staticmethod
-    def _extract_from_date(date_series: pd.Series, feature: str):
-        if feature == "year":
-            return pd.DatetimeIndex(date_series).year.to_numpy()
-        elif feature == "month":
-            return pd.DatetimeIndex(date_series).month.to_numpy()
-        elif feature == "day":
-            return pd.DatetimeIndex(date_series).day.to_numpy()
-        elif feature == "hour":
-            return pd.DatetimeIndex(date_series).hour.to_numpy()
-        elif feature == "minute":
-            return pd.DatetimeIndex(date_series).minute.to_numpy()
-        elif feature == "second":
-            return pd.DatetimeIndex(date_series).second.to_numpy()
-        elif feature == "microsecond":
-            return pd.DatetimeIndex(date_series).microsecond.to_numpy()
-        elif feature == "nanosecond":
-            return pd.DatetimeIndex(date_series).nanosecond.to_numpy()
-        elif feature == "dayofweek":
-            return pd.DatetimeIndex(date_series).dayofweek.to_numpy()
-        elif feature == "total_time":
-            tz = pd.DatetimeIndex(date_series).tz
-            # Compute the time in seconds from the epoch time UTC
-            if tz is None:
-                return (
-                    pd.to_datetime(date_series) - pd.Timestamp("1970-01-01")
-                ) // pd.Timedelta("1s")
-            else:
-                return (
-                    pd.DatetimeIndex(date_series).tz_convert("utc")
-                    - pd.Timestamp("1970-01-01", tz="utc")
-                ) // pd.Timedelta("1s")
-
-    def fit(self, X: ArrayLike, y=None) -> "DatetimeEncoder":
-        """Fit the instance to ``X``.
+    def fit(self, X, y=None):
+        """Fit the instance to X.
 
-        In practice, just check keywords and input validity,
-        and stores which extracted features are not constant.
+        Select datetime-parsable columns and generate the list of
+        datetime feature to extract.
 
         Parameters
         ----------
-        X : array-like, shape (``n_samples``, ``n_features``)
-            Data where each column is a datetime feature.
+        X : array-like, shape ``(n_samples, n_features)``
+            Input data. Columns that can't be converted into
+            ``pandas.DatetimeIndex`` and numerical values will
+            be dropped.
         y : None
             Unused, only here for compatibility.
 
@@ -180,95 +547,144 @@ def fit(self, X: ArrayLike, y=None) -> "DatetimeEncoder":
         DatetimeEncoder
             Fitted DatetimeEncoder instance (self).
         """
-        self._validate_keywords()
-        if isinstance(X, pd.DataFrame):
-            self.col_names_ = X.columns.to_list()
-        else:
-            self.col_names_ = None
-        X = check_input(X)
-        # Features to extract for each column, after removing constant features
-        self.features_per_column_ = {}
-        for i in range(X.shape[1]):
-            self.features_per_column_[i] = []
-        # Check which columns are constant
-        for i in range(X.shape[1]):
-            if self.extract_until is None:
-                if np.nanstd(self._extract_from_date(X[:, i], "total_time")) > 0:
-                    self.features_per_column_[i].append("total_time")
-            else:
-                for feature in TIME_LEVELS:
-                    if np.nanstd(self._extract_from_date(X[:, i], feature)) > 0:
-                        if TIME_LEVELS.index(feature) <= TIME_LEVELS.index(
-                            self.extract_until
-                        ):
-                            self.features_per_column_[i].append(feature)
-                        # we add a total_time feature, which contains the full
-                        # time to epoch, if there is at least one
-                        # feature that has not been extracted and is not constant
-                        if TIME_LEVELS.index(feature) > TIME_LEVELS.index(
-                            self.extract_until
-                        ):
-                            self.features_per_column_[i].append("total_time")
-                            break
-                # Add day of the week feature if needed
-                if (
-                    self.add_day_of_the_week
-                    and np.nanstd(self._extract_from_date(X[:, i], "dayofweek")) > 0
-                ):
-                    self.features_per_column_[i].append("dayofweek")
-
-        self.n_features_in_ = X.shape[1]
-        self.n_features_out_ = len(
-            np.concatenate(list(self.features_per_column_.values()))
+        if self.resolution not in TIME_LEVELS and self.resolution is not None:
+            raise ValueError(
+                f"'resolution' options are {TIME_LEVELS}, got {self.resolution!r}."
+            )
+
+        errors_options = ["coerce", "raise"]
+        if self.errors not in errors_options:
+            raise ValueError(
+                f"'errors' options are {errors_options!r}, got {self.errors!r}."
+            )
+
+        self._check_feature_names(X, reset=True)
+        self._check_n_features(X, reset=True)
+        X = check_array(
+            X, ensure_2d=True, force_all_finite=False, dtype=None, copy=False
         )
 
+        self._select_datetime_cols(X)
+
         return self
 
-    def transform(self, X: ArrayLike, y=None) -> NDArray:
+    def _select_datetime_cols(self, X):
+        """Select datetime-parsable columns and generate the list of
+        datetime feature to extract.
+
+        If the input only contains dates (and no datetimes), only the features
+        ["year", "month", "day"] will be filtered with resolution.
+
+        Parameters
+        ----------
+        X : array-like of shape ``(n_samples, n_features)``
+        """
+        if self.resolution is None:
+            levels = []
+        else:
+            idx_level = TIME_LEVELS.index(self.resolution)
+            levels = TIME_LEVELS[: idx_level + 1]
+
+        X_split = np.hsplit(X, X.shape[1])
+        self.column_indices_, self.index_to_format_ = _get_datetime_column_indices(
+            X_split
+        )
+
+        self.index_to_features_ = defaultdict(list)
+        self.n_features_out_ = 0
+
+        for col_idx in self.column_indices_:
+            X_col = pd.DatetimeIndex(X[:, col_idx])
+            if _is_column_date_only(X_col):
+                # Keep only date attributes
+                levels = [
+                    level for level in levels if level in ["year", "month", "day"]
+                ]
+
+            self.index_to_features_[col_idx] += levels
+            self.n_features_out_ += len(levels)
+
+            if self.add_total_seconds:
+                self.index_to_features_[col_idx].append("total_seconds")
+                self.n_features_out_ += 1
+
+            if self.add_day_of_the_week:
+                self.index_to_features_[col_idx].append("day_of_week")
+                self.n_features_out_ += 1
+
+    def transform(self, X, y=None):
         """Transform ``X`` by replacing each datetime column with \
         corresponding numerical features.
 
         Parameters
         ----------
-        X : array-like, shape (``n_samples``, ``n_features``)
+        X : array-like of shape ``(n_samples, n_features)``
             The data to transform, where each column is a datetime feature.
         y : None
             Unused, only here for compatibility.
 
         Returns
         -------
-        ndarray, shape (``n_samples``, ``n_features_out_``)
+        X_out : ndarray of shape ``(n_samples, n_features_out_)``
             Transformed input.
         """
-        check_is_fitted(
-            self,
-            attributes=["n_features_in_", "n_features_out_", "features_per_column_"],
+        check_is_fitted(self)
+        self._check_n_features(X, reset=False)
+        self._check_feature_names(X, reset=False)
+
+        X = check_array(
+            X,
+            ensure_2d=True,
+            force_all_finite=False,
+            dtype=None,
+            copy=False,
         )
-        X = check_input(X)
-        if X.shape[1] != self.n_features_in_:
-            raise ValueError(
-                f"The number of features in the input data ({X.shape[1]}) "
-                "does not match the number of features "
-                f"seen during fit ({self.n_features_in_}). "
-            )
-        # Create a new array with the extracted features,
-        # choosing only features that weren't constant during fit
-        X_ = np.empty((X.shape[0], self.n_features_out_), dtype=np.float64)
-        idx = 0
-        for i in range(X.shape[1]):
-            for j, feature in enumerate(self.features_per_column_[i]):
-                X_[:, idx + j] = self._extract_from_date(X[:, i], feature)
-            idx += len(self.features_per_column_[i])
-        return X_
-
-    def get_feature_names_out(self, input_features=None) -> list[str]:
-        """Return clean feature names.
+        X_split = _to_datetime_2d_array(
+            X,
+            indices=self.column_indices_,
+            index_to_format=self.index_to_format_,
+            errors=self.errors,
+        )
+
+        return self._extract_features(X_split)
+
+    def _extract_features(self, X_split):
+        """Extract datetime features from the selected columns.
+
+        Parameters
+        ----------
+        X_split : list of 1d array of length n_features
+
+        Returns
+        -------
+        X_out : ndarray of shape ``(n_samples, n_features_out_)``
+        """
+        # X_out must be of dtype float64 otherwise np.nan will overflow
+        # to large negative numbers.
+        X_out = np.empty((X_split[0].shape[0], self.n_features_out_), dtype=np.float64)
+        offset_idx = 0
+        for col_idx in self.column_indices_:
+            X_col = X_split[col_idx]
+            features = self.index_to_features_[col_idx]
+            for feat_idx, feature in enumerate(features):
+                if feature == "total_seconds":
+                    X_feature = _datetime_to_total_seconds(X_col)
+                else:
+                    X_feature = getattr(X_col, feature).to_numpy()
+                X_out[:, offset_idx + feat_idx] = X_feature
+
+            offset_idx += len(features)
+
+        return X_out
+
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
 
         Feature names are formatted like: "<column_name>_<new_feature>"
         if the original data has column names, otherwise with format
         "<column_index>_<new_feature>" where `<new_feature>` is one of
         {"year", "month", "day", "hour", "minute", "second",
-        "microsecond", "nanosecond", "dayofweek"}.
+        "microsecond", "nanosecond", "day_of_week"}.
 
         Parameters
         ----------
@@ -277,12 +693,23 @@ def get_feature_names_out(self, input_features=None) -> list[str]:
 
         Returns
         -------
-        list of str
+        feature_names : list of str
             List of feature names.
         """
+        check_is_fitted(self, "index_to_features_")
         feature_names = []
-        for i in self.features_per_column_.keys():
-            prefix = str(i) if self.col_names_ is None else self.col_names_[i]
-            for feature in self.features_per_column_[i]:
-                feature_names.append(f"{prefix}_{feature}")
+        columns = getattr(self, "feature_names_in_", list(range(self.n_features_in_)))
+        for col_idx, features in self.index_to_features_.items():
+            column = columns[col_idx]
+            feature_names += [f"{column}_{feat}" for feat in features]
         return feature_names
+
+    def _more_tags(self):
+        """
+        Used internally by sklearn to ease the estimator checks.
+        """
+        return {
+            "X_types": ["2darray", "categorical"],
+            "allow_nan": True,
+            "_xfail_checks": {"check_dtype_object": "Specific datetime error."},
+        }
diff --git a/skrub/tests/test_datetime_encoder.py b/skrub/tests/test_datetime_encoder.py
index fa7e93a93..3881eac67 100644
--- a/skrub/tests/test_datetime_encoder.py
+++ b/skrub/tests/test_datetime_encoder.py
@@ -1,494 +1,464 @@
+from copy import deepcopy
+from itertools import product
+
 import numpy as np
 import pandas as pd
 import pytest
-from sklearn.exceptions import NotFittedError
+from numpy.testing import assert_allclose, assert_array_equal
+from pandas.testing import assert_frame_equal
+
+from skrub._datetime_encoder import (
+    TIME_LEVELS,
+    DatetimeEncoder,
+    _is_pandas_format_mixed_available,
+    to_datetime,
+)
 
-from skrub._datetime_encoder import DatetimeEncoder
+NANOSECONDS_FORMAT = (
+    "%Y-%m-%d %H:%M:%S.%f" if _is_pandas_format_mixed_available() else None
+)
+MSG_MIN_PANDAS_SKIP = "Pandas format=mixed is not available"
 
 
-def get_date_array() -> np.array:
-    return np.array(
+def get_date(as_array=False):
+    df = pd.DataFrame(
         [
-            pd.to_datetime(["2020-01-01", "2020-01-02", "2020-01-03"]),
-            pd.to_datetime(["2021-02-03", "2020-02-04", "2021-02-05"]),
-            pd.to_datetime(["2022-01-01", "2020-12-25", "2022-01-03"]),
-            pd.to_datetime(["2023-02-03", "2020-02-04", "2023-02-05"]),
-        ]
+            ["2020-01-01", "2020-01-02", "2020-01-03"],
+            ["2021-02-03", "2020-02-04", "2021-02-05"],
+            ["2022-01-01", "2020-12-25", "2022-01-03"],
+            ["2023-02-03", "2020-02-04", "2023-02-05"],
+        ],
     )
+    if as_array:
+        return df.to_numpy()
+    return df
 
 
-def get_constant_date_array() -> np.array:
-    return np.array(
+def get_datetime(as_array=False):
+    df = pd.DataFrame(
         [
-            pd.to_datetime(["2020-01-01", "2020-02-04", "2021-02-05"]),
-            pd.to_datetime(["2020-01-01", "2020-02-04", "2021-02-05"]),
-            pd.to_datetime(["2020-01-01", "2020-02-04", "2021-02-05"]),
-            pd.to_datetime(["2020-01-01", "2020-02-04", "2021-02-05"]),
-        ]
+            ["2020-01-01 10:12:01", "2020-01-02 10:23:00", "2020-01-03 10:00:00"],
+            ["2021-02-03 12:45:23", "2020-02-04 22:12:00", "2021-02-05 12:00:00"],
+            ["2022-01-01 23:23:43", "2020-12-25 11:12:00", "2022-01-03 11:00:00"],
+            ["2023-02-03 11:12:12", "2020-02-04 08:32:00", "2023-02-05 23:00:00"],
+        ],
     )
+    if as_array:
+        return df.to_numpy()
+    return df
 
 
-def get_datetime_array() -> np.array:
-    return np.array(
+def get_nanoseconds(as_array=False):
+    df = pd.DataFrame(
         [
-            pd.to_datetime(
-                [
-                    "2020-01-01 10:12:01",
-                    "2020-01-02 10:23:00",
-                    "2020-01-03 10:00:00",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2021-02-03 12:45:23",
-                    "2020-02-04 22:12:00",
-                    "2021-02-05 12:00:00",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2022-01-01 23:23:43",
-                    "2020-12-25 11:12:00",
-                    "2022-01-03 11:00:00",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2023-02-03 11:12:12",
-                    "2020-02-04 08:32:00",
-                    "2023-02-05 23:00:00",
-                ],
-            ),
-        ]
+            ["2020-08-24 15:55:30.123456789", "2020-08-24 15:55:30.123456789"],
+            ["2020-08-20 14:56:31.987654321", "2021-07-20 14:56:31.987654321"],
+            ["2020-08-20 14:57:32.123987654", "2023-09-20 14:57:32.123987654"],
+            ["2020-08-20 14:58:33.987123456", "2023-09-20 14:58:33.987123456"],
+        ],
     )
+    if as_array:
+        return df.to_numpy()
+    return df
 
 
-def get_datetime_array_nanoseconds() -> np.array:
-    return np.array(
+def get_nan_datetime(as_array=False):
+    df = pd.DataFrame(
         [
-            pd.to_datetime(
-                [
-                    # constant year and month
-                    # for the first feature
-                    "2020-08-24 15:55:30.123456789",
-                    "2020-08-24 15:55:30.123456789",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2020-08-20 14:56:31.987654321",
-                    "2021-07-20 14:56:31.987654321",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2020-08-20 14:57:32.123987654",
-                    "2023-09-20 14:57:32.123987654",
-                ],
-            ),
-            pd.to_datetime(
-                [
-                    "2020-08-20 14:58:33.987123456",
-                    "2023-09-20 14:58:33.987123456",
-                ],
-            ),
-        ]
+            ["2020-01-01 10:12:01", None, "2020-01-03 10:00:00"],
+            [np.nan, "2020-02-04 22:12:00", "2021-02-05 12:00:00"],
+            ["2022-01-01 23:23:43", "2020-12-25 11:12:00", pd.NA],
+        ],
     )
+    if as_array:
+        return df.to_numpy()
+    return df
 
 
-def get_dirty_datetime_array() -> np.array:
-    return np.array(
+def get_tz_datetime(as_array=False):
+    # The equivalent dtype is "datetime64[ns, Asia/Kolkata]"
+    df = pd.DataFrame(
         [
-            np.array(
-                pd.to_datetime(
-                    [
-                        "2020-01-01 10:12:01",
-                        "2020-01-02 10:23:00",
-                        "2020-01-03 10:00:00",
-                    ]
-                )
-            ),
-            np.array(
-                pd.to_datetime([np.nan, "2020-02-04 22:12:00", "2021-02-05 12:00:00"])
-            ),
-            np.array(
-                pd.to_datetime(["2022-01-01 23:23:43", "2020-12-25 11:12:00", pd.NaT])
-            ),
-            np.array(
-                pd.to_datetime(
-                    [
-                        "2023-02-03 11:12:12",
-                        "2020-02-04 08:32:00",
-                        "2023-02-05 23:00:00",
-                    ]
-                )
-            ),
-        ]
+            ["2020-01-01 10:12:01+05:30"],
+            ["2021-02-03 12:45:23+05:30"],
+            ["2022-01-01 23:23:43+05:30"],
+            ["2023-02-03 11:12:12+05:30"],
+        ],
+    )
+    if as_array:
+        return df.to_numpy()
+    return df
+
+
+def get_mixed_type_dataframe():
+    return pd.DataFrame(
+        dict(
+            a=["2020-01-01", "2020-02-04", "2021-02-05"],
+            b=["yo", "ya", "yu"],
+            c=[1, 2, 3],
+            d=["1", "2", "3"],
+            e=["01/01/2023", "03/01/2023", "14/01/2023"],
+            f=[True, False, True],
+        )
     )
 
 
-def get_datetime_with_TZ_array() -> pd.DataFrame:
-    res = pd.DataFrame(
-        [
-            pd.to_datetime(["2020-01-01 10:12:01"]),
-            pd.to_datetime(["2021-02-03 12:45:23"]),
-            pd.to_datetime(["2022-01-01 23:23:43"]),
-            pd.to_datetime(["2023-02-03 11:12:12"]),
-        ]
+def get_mixed_datetime_format(as_array=False):
+    df = pd.DataFrame(
+        dict(
+            a=[
+                "2022-10-15",
+                "2021-12-25",
+                "2020-05-18",
+                "2019-10-15 12:00:00",
+            ]
+        )
     )
-    for col in res.columns:
-        res[col] = pd.DatetimeIndex(res[col]).tz_localize("Asia/Kolkata")
-    return res
-
-
-def test_fit() -> None:
-    # Dates
-    X = get_date_array()
-    enc = DatetimeEncoder()
-    expected_features_per_column_ = {
-        0: ["year", "month", "day"],
-        1: ["month", "day"],
-        2: ["year", "month", "day"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-    X = get_date_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_features_per_column_ = {
-        0: ["year", "month", "day", "dayofweek"],
-        1: ["month", "day", "dayofweek"],
-        2: ["year", "month", "day", "dayofweek"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-    # Datetimes
-    X = get_datetime_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_features_per_column_ = {
-        0: ["year", "month", "day", "hour", "total_time", "dayofweek"],
-        1: ["month", "day", "hour", "total_time", "dayofweek"],
-        2: ["year", "month", "day", "hour", "dayofweek"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-    # we check that the features are extracted until `extract_until`
-    # that constant feature are not extracted
-    # and that the total_time feature is extracted if needed
-    X = get_datetime_array()
-    enc = DatetimeEncoder(extract_until="minute")
-    expected_features_per_column_ = {
-        0: ["year", "month", "day", "hour", "minute", "total_time"],
-        1: ["month", "day", "hour", "minute"],
-        2: ["year", "month", "day", "hour"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-    # extract_until="nanosecond"
-    X = get_datetime_array_nanoseconds()
-    enc = DatetimeEncoder(extract_until="nanosecond")
-    expected_features_per_column_ = {
-        # constant year and month
-        # for first feature
-        0: [
-            "day",
-            "hour",
-            "minute",
-            "second",
-            "microsecond",
-            "nanosecond",
-        ],
-        1: [
-            "year",
-            "month",
-            "day",
-            "hour",
-            "minute",
-            "second",
-            "microsecond",
-            "nanosecond",
-        ],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-    # Dirty Datetimes
-    X = get_dirty_datetime_array()
-    enc = DatetimeEncoder()
-    expected_features_per_column_ = {
-        0: ["year", "month", "day", "hour", "total_time"],
-        1: ["month", "day", "hour", "total_time"],
-        2: ["year", "month", "day", "hour"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
+    if as_array:
+        return df.to_numpy()
+    return df
 
-    # Datetimes with TZ
-    X = get_datetime_with_TZ_array()
-    enc = DatetimeEncoder()
-    expected_features_per_column_ = {0: ["year", "month", "day", "hour", "total_time"]}
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
 
-    # Feature names
-    # Without column names
-    X = get_datetime_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_feature_names = [
-        "0_year",
-        "0_month",
-        "0_day",
-        "0_hour",
-        "0_total_time",
-        "0_dayofweek",
-        "1_month",
-        "1_day",
-        "1_hour",
-        "1_total_time",
-        "1_dayofweek",
-        "2_year",
-        "2_month",
-        "2_day",
-        "2_hour",
-        "2_dayofweek",
-    ]
+@pytest.mark.parametrize("as_array", [True, False])
+@pytest.mark.parametrize(
+    "get_data_func, features, format",
+    [
+        (get_date, TIME_LEVELS[: TIME_LEVELS.index("day") + 1], "%Y-%m-%d"),
+        (get_datetime, TIME_LEVELS, "%Y-%m-%d %H:%M:%S"),
+        (get_tz_datetime, TIME_LEVELS, "%Y-%m-%d %H:%M:%S%z"),
+        (get_nanoseconds, TIME_LEVELS, NANOSECONDS_FORMAT),
+    ],
+)
+@pytest.mark.parametrize(
+    "add_total_seconds, add_day_of_the_week",
+    list(product([True, False], [True, False])),
+)
+@pytest.mark.parametrize("resolution", TIME_LEVELS)
+def test_fit(
+    as_array,
+    get_data_func,
+    features,
+    format,
+    add_total_seconds,
+    add_day_of_the_week,
+    resolution,
+):
+    X = get_data_func(as_array=as_array)
+    enc = DatetimeEncoder(
+        add_day_of_the_week=add_day_of_the_week,
+        add_total_seconds=add_total_seconds,
+        resolution=resolution,
+    )
     enc.fit(X)
-    assert enc.get_feature_names_out() == expected_feature_names
 
-    # With column names
-    X = get_datetime_array()
-    X = pd.DataFrame(X)
-    X.columns = ["col1", "col2", "col3"]
-    enc = DatetimeEncoder(add_day_of_the_week=True)
+    total_seconds = ["total_seconds"] if add_total_seconds else []
+    day_of_week = ["day_of_week"] if add_day_of_the_week else []
+
+    if resolution in features:
+        features_ = features[: features.index(resolution) + 1]
+    else:
+        features_ = deepcopy(features)
+
+    features_ += total_seconds + day_of_week
+    columns = range(X.shape[1])
+
+    expected_index_to_features = {col: features_ for col in columns}
+    expected_index_to_format = {col: format for col in columns}
+    expected_n_features_out = len(features_) * X.shape[1]
     expected_feature_names = [
-        "col1_year",
-        "col1_month",
-        "col1_day",
-        "col1_hour",
-        "col1_total_time",
-        "col1_dayofweek",
-        "col2_month",
-        "col2_day",
-        "col2_hour",
-        "col2_total_time",
-        "col2_dayofweek",
-        "col3_year",
-        "col3_month",
-        "col3_day",
-        "col3_hour",
-        "col3_dayofweek",
+        f"{col}_{feature}" for col in columns for feature in features_
     ]
-    enc.fit(X)
+
+    assert enc.index_to_features_ == expected_index_to_features
+    assert enc.index_to_format_ == expected_index_to_format
+    assert enc.n_features_out_ == expected_n_features_out
     assert enc.get_feature_names_out() == expected_feature_names
 
 
-def test_transform() -> None:
-    # Dates
-    X = get_date_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_result = np.array(
-        [
-            [2020, 1, 1, 2, 1, 2, 3, 2020, 1, 3, 4],
-            [2021, 2, 3, 2, 2, 4, 1, 2021, 2, 5, 4],
-            [2022, 1, 1, 5, 12, 25, 4, 2022, 1, 3, 0],
-            [2023, 2, 3, 4, 2, 4, 1, 2023, 2, 5, 6],
-        ]
+def test_format_nan():
+    X = get_nan_datetime()
+    enc = DatetimeEncoder().fit(X)
+    expected_index_to_format = {
+        0: "%Y-%m-%d %H:%M:%S",
+        1: "%Y-%m-%d %H:%M:%S",
+        2: "%Y-%m-%d %H:%M:%S",
+    }
+    assert enc.index_to_format_ == expected_index_to_format
+
+
+def test_format_nz():
+    X = get_tz_datetime()
+    enc = DatetimeEncoder().fit(X)
+    assert enc.index_to_format_ == {0: "%Y-%m-%d %H:%M:%S%z"}
+
+
+def test_resolution_none():
+    X = get_datetime()
+    enc = DatetimeEncoder(
+        resolution=None,
+        add_total_seconds=False,
     )
     enc.fit(X)
-    assert np.allclose(enc.transform(X), expected_result, equal_nan=True)
 
-    enc = DatetimeEncoder(add_day_of_the_week=False)
-    expected_result = np.array(
-        [
-            [2020, 1, 1, 1, 2, 2020, 1, 3],
-            [2021, 2, 3, 2, 4, 2021, 2, 5],
-            [2022, 1, 1, 12, 25, 2022, 1, 3],
-            [2023, 2, 3, 2, 4, 2023, 2, 5],
-        ]
+    assert enc.index_to_features_ == {0: [], 1: [], 2: []}
+    assert enc.n_features_out_ == 0
+    assert enc.get_feature_names_out() == []
+
+
+def test_transform_date():
+    X = get_date()
+    enc = DatetimeEncoder(
+        add_total_seconds=False,
     )
-    enc.fit(X)
-    assert np.allclose(enc.transform(X), expected_result, equal_nan=True)
+    X_trans = enc.fit_transform(X)
 
-    enc = DatetimeEncoder(add_day_of_the_week=True)
     expected_result = np.array(
         [
-            [2020, 1, 1, 2, 1, 2, 3, 2020, 1, 3, 4],
-            [2021, 2, 3, 2, 2, 4, 1, 2021, 2, 5, 4],
-            [2022, 1, 1, 5, 12, 25, 4, 2022, 1, 3, 0],
-            [2023, 2, 3, 4, 2, 4, 1, 2023, 2, 5, 6],
+            [2020, 1, 1, 2020, 1, 2, 2020, 1, 3],
+            [2021, 2, 3, 2020, 2, 4, 2021, 2, 5],
+            [2022, 1, 1, 2020, 12, 25, 2022, 1, 3],
+            [2023, 2, 3, 2020, 2, 4, 2023, 2, 5],
         ]
     )
-    enc.fit(X)
-    assert np.allclose(enc.transform(X), expected_result, equal_nan=True)
+    X_trans = enc.transform(X)
+    assert_array_equal(X_trans, expected_result)
 
-    # Datetimes
-    X = get_datetime_array()[:, 0].reshape(-1, 1)
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    # Check that the "total_time" feature is working
-    expected_result = np.array(
+
+def test_transform_datetime():
+    X = get_datetime()
+    enc = DatetimeEncoder(
+        resolution="second",
+        add_total_seconds=False,
+    )
+    X_trans = enc.fit_transform(X)
+    expected_X_trans = np.array(
         [
-            [2020, 1, 1, 10, 0, 2],
-            [2021, 2, 3, 12, 0, 2],
-            [2022, 1, 1, 23, 0, 5],
-            [2023, 2, 3, 11, 0, 4],
+            [2020, 1, 1, 10, 12, 1, 2020, 1, 2, 10, 23, 0, 2020, 1, 3, 10, 0, 0],
+            [2021, 2, 3, 12, 45, 23, 2020, 2, 4, 22, 12, 0, 2021, 2, 5, 12, 0, 0],
+            [2022, 1, 1, 23, 23, 43, 2020, 12, 25, 11, 12, 0, 2022, 1, 3, 11, 0, 0],
+            [2023, 2, 3, 11, 12, 12, 2020, 2, 4, 8, 32, 0, 2023, 2, 5, 23, 0, 0],
         ]
-    ).astype(np.float64)
-    # Time from epochs in seconds
-    expected_result[:, 4] = (X.astype("int64") // 1e9).astype(np.float64).reshape(-1)
+    )
+    assert_array_equal(X_trans, expected_X_trans)
 
-    enc.fit(X)
-    X_trans = enc.transform(X)
-    assert np.allclose(X_trans, expected_result, equal_nan=True)
-
-    # Check if we find back the date from the time to epoch
-    assert (
-        (
-            pd.to_datetime(X_trans[:, 4], unit="s") - pd.to_datetime(X.reshape(-1))
-        ).total_seconds()
-        == 0
-    ).all()
-
-    # Dirty datetimes
-    X = get_dirty_datetime_array()[:, 0].reshape(-1, 1)
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_result = np.array(
+
+def test_transform_tz():
+    X = get_tz_datetime()
+    enc = DatetimeEncoder(
+        add_total_seconds=True,
+    )
+    X_trans = enc.fit_transform(X)
+    expected_X_trans = np.array(
         [
-            [2020, 1, 1, 10, 0, 2],
-            [np.nan] * 6,
-            [2022, 1, 1, 23, 0, 5],
-            [2023, 2, 3, 11, 0, 4],
+            [2020, 1, 1, 10, 1.57785372e09],
+            [2021, 2, 3, 12, 1.61233652e09],
+            [2022, 1, 1, 23, 1.64105962e09],
+            [2023, 2, 3, 11, 1.67540293e09],
         ]
     )
-    # Time from epochs in seconds
-    expected_result[:, 4] = (X.astype("int64") // 1e9).astype(np.float64).reshape(-1)
-    expected_result[1, 4] = np.nan
-    enc.fit(X)
-    X_trans = enc.transform(X)
-    assert np.allclose(X_trans, expected_result, equal_nan=True)
-
-    # Datetimes with TZ
-    # If the dates are timezone-aware, all the feature extractions should
-    # be done in the provided timezone.
-    # But the full time to epoch should correspond to the true number of
-    # seconds between epoch time and the time of the date.
-    X = get_datetime_with_TZ_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    expected_result = np.array(
+    assert_allclose(X_trans, expected_X_trans)
+
+
+def test_transform_nan():
+    X = get_nan_datetime()
+    enc = DatetimeEncoder(
+        add_total_seconds=True,
+    )
+    X_trans = enc.fit_transform(X)
+    expected_X_trans = np.array(
         [
-            [2020, 1, 1, 10, 0, 2],
-            [2021, 2, 3, 12, 0, 2],
-            [2022, 1, 1, 23, 0, 5],
-            [2023, 2, 3, 11, 0, 4],
+            [
+                2020,
+                1,
+                1,
+                10,
+                1.57787352e09,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                2020,
+                1,
+                3,
+                10,
+                1.57804560e09,
+            ],
+            [
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                2020,
+                2,
+                4,
+                22,
+                1.58085432e09,
+                2021,
+                2,
+                5,
+                12,
+                1.61252640e09,
+            ],
+            [
+                2022,
+                1,
+                1,
+                23,
+                1.64107942e09,
+                2020,
+                12,
+                25,
+                11,
+                1.60889472e09,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
         ]
-    ).astype(np.float64)
-    # Time from epochs in seconds
-    expected_result[:, 4] = (
-        (X.iloc[:, 0].view(dtype="int64") // 1e9)
-        .astype(np.float64)
-        .to_numpy()
-        .reshape(-1)
     )
-    enc.fit(X)
-    X_trans = enc.transform(X)
-    assert np.allclose(X_trans, expected_result, equal_nan=True)
+    assert_allclose(X_trans, expected_X_trans)
+
+
+def test_mixed_type_dataframe():
+    X = get_mixed_type_dataframe()
+    enc = DatetimeEncoder().fit(X)
+    assert enc.index_to_format_ == {0: "%Y-%m-%d", 4: "%d/%m/%Y"}
+
+    X_dt = to_datetime(X)
+    expected_dtypes = [
+        np.dtype("<M8[ns]"),
+        np.dtype("object"),
+        np.dtype("int64"),
+        np.dtype("object"),
+        np.dtype("<M8[ns]"),
+        np.dtype("bool"),
+    ]
+    assert X_dt.dtypes.to_list() == expected_dtypes
+
+    X_dt = to_datetime(X.to_numpy())
+    assert X_dt.dtype == np.object_
+
+
+def test_indempotency():
+    df = get_mixed_datetime_format()
+    df_dt = to_datetime(df)
+    df_dt_2 = to_datetime(df_dt)
+    assert_frame_equal(df_dt, df_dt_2)
+
+    X_trans = DatetimeEncoder().fit_transform(df)
+    X_trans_2 = DatetimeEncoder().fit_transform(df_dt)
+    assert_array_equal(X_trans, X_trans_2)
 
-    # Check if we find back the date from the time to epoch
-    assert (
-        (
-            pd.to_datetime(X_trans[:, 4], unit="s")
-            .tz_localize("utc")
-            .tz_convert(X.iloc[:, 0][0].tz)
-            - pd.DatetimeIndex(X.iloc[:, 0])
-        ).total_seconds()
-        == 0
-    ).all()
 
-    # Check if it's working when the date is constant
-    X = get_constant_date_array()
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    assert enc.fit_transform(X).shape[1] == 0
+def test_datetime_encoder_invalid_params():
+    X = get_datetime()
+
+    with pytest.raises(ValueError, match=r"(?=.*'resolution' options)"):
+        DatetimeEncoder(resolution="hello").fit(X)
+
+    DatetimeEncoder(resolution=None).fit(X)
+
+    with pytest.raises(ValueError, match=r"(?=.*'errors' options)"):
+        DatetimeEncoder(errors="ignore").fit(X)
 
 
 @pytest.mark.parametrize(
-    "extract_until",
-    ["year", "month", "day", "hour", "minute", "second", "microsecond", "nanosecond"],
+    "X",
+    [
+        True,
+        "a",
+        ["a", "b"],
+        ("a", "b"),
+        1,
+        [1, 2],
+        np.array([1, 2]),
+        pd.Timestamp(2020, 1, 1),
+        np.array(["2020-01-01", "hello", "2020-01-02"]),
+    ],
 )
-def test_extract_until(extract_until) -> None:
-    time_levels = [
-        "year",
-        "month",
-        "day",
-        "hour",
-        "minute",
-        "second",
-        "microsecond",
-        "nanosecond",
-    ]
-    X = get_datetime_array()
-    enc = DatetimeEncoder(extract_until=extract_until)
-    expected_features_per_column_ = {
-        # all features after seconds are constant
-        # we want total_time if we have not extracted all non-constant features
-        0: time_levels[
-            : min(time_levels.index(extract_until), time_levels.index("second")) + 1
-        ]
-        + (
-            ["total_time"]
-            if extract_until in ["year", "month", "day", "hour", "minute"]
-            else []
-        ),
-        # constant after minute + year constant
-        1: time_levels[
-            1 : min(time_levels.index(extract_until), time_levels.index("minute")) + 1
-        ]
-        + (["total_time"] if extract_until in ["year", "month", "day", "hour"] else []),
-        # constant after hour
-        2: time_levels[
-            : min(time_levels.index(extract_until), time_levels.index("hour")) + 1
-        ]
-        + (["total_time"] if extract_until in ["year", "month", "day"] else []),
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
-
-
-def test_extract_until_none() -> None:
-    X = get_dirty_datetime_array()
-    enc = DatetimeEncoder(extract_until=None)
-    expected_features_per_column_ = {
-        # all features after seconds are constant
-        # we want total_time if we have not extracted all non-constant features
-        0: ["total_time"],
-        1: ["total_time"],
-        2: ["total_time"],
-    }
-    enc.fit(X)
-    assert enc.features_per_column_ == expected_features_per_column_
+def test_to_datetime_incorrect_skip(X):
+    assert_array_equal(to_datetime(X), X)
 
-    # check get_names_out
-    expected_feature_names = [
-        "0_total_time",
-        "1_total_time",
-        "2_total_time",
-    ]
-    assert enc.get_feature_names_out() == expected_feature_names
 
-    # check with constant datetimes
-    X = get_constant_date_array()
-    enc = DatetimeEncoder(extract_until=None)
-    assert enc.fit_transform(X).shape[1] == 0
+def test_to_datetime_type_error():
+    # 3d tensor
+    X = [[["2021-01-01"]]]
+    with pytest.raises(TypeError):
+        to_datetime(X)
 
 
-def test_check_fitted_datetime_encoder() -> None:
-    """Test that calling transform before fit raises an error"""
-    X = get_datetime_array()[:, 0].reshape(-1, 1)
-    enc = DatetimeEncoder(add_day_of_the_week=True)
-    with pytest.raises(NotFittedError):
-        enc.transform(X)
+def test_to_datetime_invalid_params():
+    with pytest.raises(ValueError, match=r"(?=.*errors options)"):
+        to_datetime(2020, errors="skip")
 
-    # Check that it works after fit
-    enc.fit(X)
-    enc.transform(X)
+    with pytest.raises(ValueError, match=r"(?=.*not a parameter of skrub)"):
+        to_datetime(2020, unit="second")
+
+
+@pytest.mark.skipif(
+    not _is_pandas_format_mixed_available(),
+    reason=MSG_MIN_PANDAS_SKIP,
+)
+def test_to_datetime_format_param():
+    X_col = ["2021-01-01", "2021/01/01"]
+
+    # without format (default)
+    out = to_datetime(X_col)
+    expected_out = np.array(["2021-01-01", "NaT"], dtype="datetime64[ns]")
+    assert_array_equal(out, expected_out)
+
+    # with format
+    out = to_datetime(X_col, format="%Y/%m/%d")
+    expected_out = np.array(["NaT", "2021-01-01"], dtype="datetime64[ns]")
+    assert_array_equal(out, expected_out)
+
+
+def test_mixed_datetime_format():
+    df = get_mixed_datetime_format()
+
+    df_dt = to_datetime(df)
+    expected_df_dt = pd.DataFrame(
+        dict(
+            a=[
+                pd.Timestamp("2022-10-15"),
+                pd.Timestamp("2021-12-25"),
+                pd.Timestamp("2020-05-18"),
+                pd.Timestamp("2019-10-15 12:00:00"),
+            ]
+        )
+    )
+    assert_frame_equal(df_dt, expected_df_dt)
+
+    series_dt = to_datetime(df["a"])
+    expected_series_dt = expected_df_dt["a"]
+    assert_array_equal(series_dt, expected_series_dt)
+
+
+@pytest.mark.skipif(not _is_pandas_format_mixed_available(), reason=MSG_MIN_PANDAS_SKIP)
+def test_mix_of_unambiguous():
+    X_col = ["2021/10/15", "01/14/2021"]
+    out = to_datetime(X_col)
+    expected_out = np.array(
+        [np.datetime64("2021-10-15"), np.datetime64("NaT")],
+        dtype="datetime64[ns]",
+    )
+    assert_array_equal(out, expected_out)
+
+
+def test_only_ambiguous():
+    X_col = ["2021/10/10", "2020/01/02"]
+    out = to_datetime(X_col)
+    # monthfirst by default
+    expected_out = np.array(["2021-10-10", "2020-01-02"], dtype="datetime64[ns]")
+    assert_array_equal(out, expected_out)
+
+
+def test_monthfirst_only():
+    X_col = ["2021/02/02", "2021/01/15"]
+    out = to_datetime(X_col)
+    expected_out = np.array(["2021-02-02", "2021-01-15"], dtype="datetime64[ns]")
+    assert_array_equal(out, expected_out)
diff --git a/skrub/tests/test_table_vectorizer.py b/skrub/tests/test_table_vectorizer.py
index ffe41155b..333b4d7f9 100644
--- a/skrub/tests/test_table_vectorizer.py
+++ b/skrub/tests/test_table_vectorizer.py
@@ -8,9 +8,12 @@
 from sklearn.utils.validation import check_is_fitted
 
 from skrub import GapEncoder, MinHashEncoder, SuperVectorizer, TableVectorizer
+from skrub._datetime_encoder import _is_pandas_format_mixed_available
 from skrub._table_vectorizer import _infer_date_format
 from skrub.tests.utils import transformers_list_equal
 
+MSG_PANDAS_DEPRECATED_WARNING = "Skip deprecation warning"
+
 
 def check_same_transformers(
     expected_transformers: dict, actual_transformers: list
@@ -788,7 +791,7 @@ def test_mixed_types() -> None:
             pd.DataFrame({"col1": [1.0, 2.0, np.nan]}),
         ),
         # All datetimes during fit, 1 category during transform
-        (
+        pytest.param(
             pd.DataFrame(
                 {
                     "col1": [
@@ -816,6 +819,10 @@ def test_mixed_types() -> None:
                     ]
                 }
             ),
+            marks=pytest.mark.skipif(
+                not _is_pandas_format_mixed_available(),
+                reason=MSG_PANDAS_DEPRECATED_WARNING,
+            ),
         ),
     ],
 )