Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Visualization of selection-based response stats in Task Review #1193

Merged
merged 3 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/web/docs/guides/how_to_use/review_app/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ _Note that a custom view of Task results is included (at the bottom) only if you
![Submission reject dialog](./screenshots/submission_reject_dialog.png)
<br/>
<br/>

### Task statistics

![Task statistics](./screenshots/task_stats.png)
<br/>
<br/>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
30 changes: 26 additions & 4 deletions docs/web/docs/guides/how_to_use/review_app/server_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ Get all available tasks (to select one for review)
{
"tasks": [
{
"created_at": <timestamp>,
"has_stats": <bool>,
"id": <int>,
"name": <str>,
"is_reviewed": <bool>,
"unit_count": <int>,
"created_at": <timestamp>
"name": <str>,
"unit_count": <int>
},
... // more tasks
]
Expand Down Expand Up @@ -90,6 +91,27 @@ Serve a single composed file with reviewed task results (API response is a file

---

### `GET /api/tasks/{id}/{n_units}/stats-results`

Assemble stats with results for a Task.

```
{
"stats": {
<str>: {
<str>: <str> | <int>,
...
},
...
},
"task_id": <str>,
"task_name": <str>,
"workers_count": <int>
}
```

---

### `GET /api/tasks/{id}/worker-units-ids`

Get full, unpaginated list of unit IDs within a task (for subsequent client-side grouping by worker_id and `GET /task-units` pagination)
Expand Down Expand Up @@ -310,7 +332,7 @@ Get list of all granted qualifications for a worker

---

### `GET /api/stats?{task_id=}{worker_id=}{since=}{limit=}`
### `GET /api/review-stats?{task_id=}{worker_id=}{since=}{limit=}`

Get stats of (recent) approvals. Either `task_id` or `worker_id` (or both) must be present.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ mephisto:
task_title: "Dynamic form-based Tasks for Mturk"
task_description: "In this Task, we use dynamic FormComposer feature."
task_reward: 0.05
task_tags: "dynamic,form,testing"
task_tags: "dynamic,form,testing,form-composer"
assignment_duration_in_seconds: 3600
force_rebuild: true
max_num_concurrent_units: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ mephisto:
task_title: "Dynamic form-based Tasks for Prolifik"
task_description: "In this Task, we use dynamic FormComposer feature."
task_reward: 70
task_tags: "test,simple,form"
task_tags: "test,simple,form,form-composer"
force_rebuild: true
max_num_concurrent_units: 1
provider:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ mephisto:
task_title: "Example how to easily create dynamic form-based Tasks"
task_description: "In this Task, we use FormComposer feature."
task_reward: 0
task_tags: "test,dynamic,form"
task_tags: "test,dynamic,form,form-composer"
force_rebuild: true
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ mephisto:
task_title: "Dynamic form-based Tasks with expiring URLs for Prolifik"
task_description: "In this Task, we use dynamic FormComposer feature with presigned S3 URLs."
task_reward: 70
task_tags: "test,simple,form"
task_tags: "test,simple,form,form-composer"
force_rebuild: true
max_num_concurrent_units: 1
provider:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ mephisto:
task_title: "Example how to easily create simple form-based Tasks"
task_description: "In this Task, we use FormComposer feature."
task_reward: 0
task_tags: "test,simple,form"
task_tags: "test,simple,form,form-composer"
force_rebuild: true
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ mephisto:
task_title: "Form-task"
task_description: "Auto-generated form"
task_reward: 0
task_tags: "simple,form,generated"
task_tags: "simple,form,generated,form-composer"
110 changes: 110 additions & 0 deletions mephisto/generators/form_composer/stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python3

# Copyright (c) Meta Platforms and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from typing import List

from mephisto.data_model.task import Task
from mephisto.data_model.unit import Unit
from mephisto.tools.data_browser import DataBrowser

FIELD_TYPES_FOR_HISTOGRAM = ["radio", "checkbox", "select"]


def _get_unit_data(data_browser: DataBrowser, unit: Unit) -> dict:
unit_data = data_browser.get_data_from_unit(unit)

unit_inputs = unit_data.get("data", {}).get("inputs") or {}
unit_outputs = unit_data.get("data", {}).get("outputs") or {}
# In case if there is outdated code that returns `final_submission`
# under `inputs` and `outputs` keys, we should use the value in side `final_submission`
if "final_submission" in unit_inputs:
unit_inputs = unit_inputs["final_submission"]
if "final_submission" in unit_outputs:
unit_outputs = unit_outputs["final_submission"]

return {
"unit_inputs": unit_inputs,
"unit_outputs": unit_outputs,
}


def _get_unit_fields_for_histogram(unit_inputs: dict) -> dict:
fields = {}
form_data = unit_inputs["form"]
for section in form_data["sections"]:
for fieldset in section["fieldsets"]:
for row in fieldset["rows"]:
for field in row["fields"]:
if field["type"] in FIELD_TYPES_FOR_HISTOGRAM:
fields[field["name"]] = field
return fields


def _update_data_for_histogram(data: dict, fields: dict, unit_outputs: dict) -> dict:
for field_name, field in fields.items():
histogram_name = field["label"]
prev_histogram_value = data.get(histogram_name, {})

field_options_to_dict = {o["value"]: o["label"] for o in field["options"]}

is_multiple = field.get("multiple") is True

unit_field_result = unit_outputs.get(field_name)

# Radio
if isinstance(unit_field_result, dict):
unit_field_result = [k for k, v in unit_field_result.items() if v is True]
is_multiple = True

unit_field_result = unit_field_result if is_multiple else [unit_field_result]
for option_value, option_name in field_options_to_dict.items():
prev_option_value = prev_histogram_value.get(option_name, 0)

plus_worker = 1 if option_value in unit_field_result else 0
prev_histogram_value[option_name] = prev_option_value + plus_worker

data[histogram_name] = prev_histogram_value

return data


def collect_task_stats(task: Task) -> dict:
data_for_histogram = {}

data_browser = DataBrowser(db=task.db)

units: List[Unit] = task.db.find_units(task_id=task.db_id)
for unit in units:
unit_data = _get_unit_data(data_browser, unit)
unit_inputs = unit_data["unit_inputs"]
unit_outputs = unit_data["unit_outputs"]
unit_fields_for_histogram = _get_unit_fields_for_histogram(unit_inputs)
data_for_histogram = _update_data_for_histogram(
data_for_histogram,
unit_fields_for_histogram,
unit_outputs,
)

return {
"stats": data_for_histogram,
"task_id": task.db_id,
"task_name": task.task_name,
"workers_count": len(set(([u.worker_id for u in units]))),
}


def check_task_has_fields_for_stats(task: Task) -> bool:
data_browser = DataBrowser(db=task.db)

units: List[Unit] = task.db.find_units(task_id=task.db_id)
for unit in units:
unit_data = _get_unit_data(data_browser, unit)
unit_inputs = unit_data["unit_inputs"]
unit_fields_for_histogram = _get_unit_fields_for_histogram(unit_inputs)
if unit_fields_for_histogram:
return True

return False
Loading
Loading