Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Commit

Permalink
refactor(Dataset): Review dataset resource class (#158)
Browse files Browse the repository at this point in the history
* refactor(Dataset): Review dataset resource class

- `model.workspace_id` is used if present
- `name` getter/setter and remove _sync call
- `status` and `id` args removed from init since are read-only

* tests: add more tests
  • Loading branch information
frascuchon authored May 10, 2024
1 parent 109e223 commit 6a2b4e6
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 14 deletions.
32 changes: 18 additions & 14 deletions src/argilla_sdk/datasets/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,38 +42,42 @@ class Dataset(Resource):
def __init__(
self,
name: Optional[str] = None,
status: Literal["draft", "ready"] = "draft",
workspace: Optional[Union["Workspace", str]] = None,
settings: Optional[Settings] = None,
client: Optional["Argilla"] = Argilla(),
id: Optional[Union[UUID, str]] = None,
_model: Optional[DatasetModel] = None,
) -> None:
"""Initalizes a Dataset with a client and model
Args:
name (str): Name of the dataset. Replaced by random UUID if not assigned.
status ["draft", "ready"]: Status of the dataset
workspace_id (UUID): Workspace_id of the dataset
workspace (UUID): Workspace of the dataset. Default is the first workspace found in the client.
settings (Settings): Settings class to be used to configure the dataset.
client (Argilla): Instance of Argilla to connect with the server.
id: (UUID): To predefine dataset_id or to reference existing datasets.
Random UUID is used if not assigned.
"""
super().__init__(client=client, api=client.api.datasets)
if name is None:
name = str(id)
self.log(f"Settings dataset name to unique UUID: {id}")
self.workspace_id = self.__workspace_id_from_name(workspace=workspace)
_model = _model or DatasetModel(
name = f"dataset_{uuid4()}"
self.log(f"Settings dataset name to unique UUID: {name}")

self.workspace_id = (
_model.workspace_id
if _model and _model.workspace_id
else self.__workspace_id_from_name(workspace=workspace)
)
self._model = _model or DatasetModel(
name=name,
status=status,
workspace_id=self._convert_optional_uuid(uuid=self.workspace_id),
id=self._convert_optional_uuid(uuid=id),
)
self._model = _model
self._settings = self.__configure_settings_for_dataset(settings=settings)
self.__records = DatasetRecords(client=self._client, dataset=self)
self._sync(model=self._model)

@property
def name(self) -> str:
return self._model.name

@name.setter
def name(self, value: str) -> None:
self._model.name = value

@property
def records(self) -> "DatasetRecords":
Expand Down
24 changes: 24 additions & 0 deletions tests/integration/test_listing_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

from argilla_sdk import Argilla, Dataset


@pytest.fixture
def client() -> Argilla:
return Argilla()


class TestDatasetsList:

def test_list_datasets(self, client: Argilla):
dataset = client.datasets("test_dataset")
if dataset.exists():
dataset.delete()
dataset.create()

datasets = client.datasets
assert len(datasets) > 0, "No datasets were found"

for ds in datasets:
if ds.name == "test_dataset":
assert ds == dataset, "The dataset was not loaded properly"

0 comments on commit 6a2b4e6

Please sign in to comment.