From b4b47a938d58d52841afd5d481f365bfe8d442dc Mon Sep 17 00:00:00 2001
From: Nathan Molinier <nathan.molinier@gmail.com>
Date: Fri, 12 Jan 2024 15:47:46 -0500
Subject: [PATCH 1/7] Update dataset-curation.md

---
 data/dataset-curation.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index 24586155..ceb635e5 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -551,12 +551,12 @@ sci-bordeaux
                 └── anat
                     ├── sub-001_acq-sag_T2w_label-SC_seg.nii.gz  # spinal cord (SC) binary segmentation 
                     ├── sub-001_acq-sag_T2w_label-SC_softseg.nii.gz  # spinal cord (SC) soft segmentation
-                    ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling (SC) soft segmentation
+                    ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling
                     ├── sub-001_acq-sag_T2w_label-vertebrae_dseg  # vertebrae discrete segmentation (segmented stuctures have different values based on the vertebral levels)
                     ├── sub-001_acq-sag_T2w_label-rootlets_dseg  # nerve rootlets discrete segmentation (segmented stuctures have different values based on the spinal level)
-                    ├── sub-001_acq-sag_T2w_label-compression_label.nii.gz # binary compression labeling (compression levels are located using only binary labels)
-                    ├── sub-001_acq-sag_T2w_label-PMJ_dlabel # single point-wise label of pmj with value 50
-                    └── sub-001_acq-sag_T2w_label-lesion_seg # binary lesion segmentation (the related disease is here SCI base on the name of the dataset)
+                    ├── sub-001_acq-sag_T2w_label-compression_label.nii.gz  # binary compression labeling (compression levels are indicated as a single voxel with a value '1' at the point of compression)
+                    ├── sub-001_acq-sag_T2w_label-PMJ_dlabel  # Pontomedullary junction is indicated as a single voxel with a value '50'
+                    └── sub-001_acq-sag_T2w_label-lesion_seg  # lesion binary segmentation (the associated disease could be SCI, MS, etc. and is indicated in the file participants.tsv)
 
 ```
 

From be3819fb4fd06d7891c2257d391f1b3b01507907 Mon Sep 17 00:00:00 2001
From: Nathan Molinier <nathan.molinier@gmail.com>
Date: Fri, 12 Jan 2024 16:16:06 -0500
Subject: [PATCH 2/7] Explain dataset_description.json and folder use

---
 data/dataset-curation.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index ceb635e5..60f9bacc 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -303,10 +303,11 @@ In this section we decided not to fully follow the BIDS derivatives convention.
 ```
 
 ```{warning}
-Derivative data obtained using DIFFERENT processes/workflows should be stored using DIFFERENT derivatives folders. Eg:
+Derivative data obtained using different processes/workflows should ideally be stored using different derivatives folders. Eg:
 - `derivatives/labels/`
 - `derivatives/sct_5.6/`
 - `derivatives/fmriprep_2.3/`
+However, to streamline data identification and reduce the need for extensive folder crawling, we [opted](https://github.com/neuropoly/data-management/issues/282) to gather common labels, such as binary segmentation and point-wise labeling, into the same derivative folder called labels.
 ```
 
 ```{note}
@@ -400,6 +401,8 @@ In addition to the subjects folders, derived datasets must include their own `da
 }
 ```
 
+The field `GeneratedBy` has to be used to name the different functions and processes used to generate the data.
+
 ```{warning}
 The `dataset_description.json` file within the derived dataset should include `"DatasetType": "derivative"`.
 ```

From f6f64f00c87d6ab88d5fc6a3e3a0ba3d3e106b52 Mon Sep 17 00:00:00 2001
From: Nathan Molinier <nathan.molinier@gmail.com>
Date: Fri, 12 Jan 2024 17:05:04 -0500
Subject: [PATCH 3/7] Update data/dataset-curation.md

Co-authored-by: Julien Cohen-Adad <jcohen@polymtl.ca>
---
 data/dataset-curation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index 60f9bacc..905abdab 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -554,7 +554,7 @@ sci-bordeaux
                 └── anat
                     ├── sub-001_acq-sag_T2w_label-SC_seg.nii.gz  # spinal cord (SC) binary segmentation 
                     ├── sub-001_acq-sag_T2w_label-SC_softseg.nii.gz  # spinal cord (SC) soft segmentation
-                    ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling
+                    ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling using the following convention: https://spinalcordtoolbox.com/user_section/tutorials/vertebral-labeling/labeling-conventions.html
                     ├── sub-001_acq-sag_T2w_label-vertebrae_dseg  # vertebrae discrete segmentation (segmented stuctures have different values based on the vertebral levels)
                     ├── sub-001_acq-sag_T2w_label-rootlets_dseg  # nerve rootlets discrete segmentation (segmented stuctures have different values based on the spinal level)
                     ├── sub-001_acq-sag_T2w_label-compression_label.nii.gz  # binary compression labeling (compression levels are indicated as a single voxel with a value '1' at the point of compression)

From 3042c39ddb49407b3cb33a179f7865bf8bc02474 Mon Sep 17 00:00:00 2001
From: Nathan Molinier <nathan.molinier@gmail.com>
Date: Mon, 15 Jan 2024 14:16:46 -0500
Subject: [PATCH 4/7] Add more cases JSON sidecars

---
 data/dataset-curation.md | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index 905abdab..069426fe 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -424,7 +424,25 @@ JSON sidecars are companion files linked to data files. They share the same file
 Therefore, to improve the way we track our data, `.json` sidecars will have to be generated for each data present in derived datasets. Here are few examples of JSON sidecar:
 
 <details>
-<summary>JSON sidecar (ORIGINAL SPACE)</summary>
+<summary>JSON sidecar (Manually created in the ORIGINAL SPACE)</summary>
+    
+```json
+{
+    "SpatialReference": "orig",
+    "GeneratedBy": [
+        {
+            "Name": "Manual",
+            "Author": "Nathan Molinier",
+            "Date": "2023-07-14 13:43:10"
+        }
+    ]
+}
+```
+
+</details>
+
+<details>
+<summary>JSON sidecar (Data automatically created then manually corrected in the ORIGINAL SPACE)</summary>
     
 ```json
 {
@@ -446,7 +464,7 @@ Therefore, to improve the way we track our data, `.json` sidecars will have to b
 </details>
 
 <details>
-<summary>JSON sidecar (RESAMPLED and CROPPED)</summary>
+<summary>JSON sidecar (Data RESAMPLED and CROPPED)</summary>
     
 ```json
 {
@@ -480,7 +498,7 @@ Because the space used for the derived data is different from the original raw d
 </details>
 
 <details>
-<summary>JSON sidecar (PAM50 SPACE)</summary>
+<summary>JSON sidecar (Data moved to the PAM50 SPACE)</summary>
     
 ```json
 {

From eea86c6b36c7f34161f52fe930416bad8c87874a Mon Sep 17 00:00:00 2001
From: Nathan Molinier <nathan.molinier@gmail.com>
Date: Mon, 15 Jan 2024 17:24:01 -0500
Subject: [PATCH 5/7] Update dataset-curation.md

---
 data/dataset-curation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index 069426fe..b9ade43e 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -307,7 +307,7 @@ Derivative data obtained using different processes/workflows should ideally be s
 - `derivatives/labels/`
 - `derivatives/sct_5.6/`
 - `derivatives/fmriprep_2.3/`
-However, to streamline data identification and reduce the need for extensive folder crawling, we [opted](https://github.com/neuropoly/data-management/issues/282) to gather common labels, such as binary segmentation and point-wise labeling, into the same derivative folder called labels.
+However, to streamline data identification and reduce the need for extensive folder crawling, we [opted](https://github.com/neuropoly/data-management/issues/282) to gather common labels, such as binary segmentation and point-wise labeling, into the same derivative folder called labels. For particular project, having a separe derived folder can still be envisioned.
 ```
 
 ```{note}

From 7c64dff4d6267adb467f947bed97ac7b5f947b3e Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Tue, 16 Jan 2024 15:39:38 -0500
Subject: [PATCH 6/7] Cleanup of raw section

---
 data/dataset-curation.md | 130 +++++++++++++++------------------------
 1 file changed, 49 insertions(+), 81 deletions(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index b9ade43e..386b30eb 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -2,48 +2,60 @@
 
 ## Converting data to BIDS
 
-All git-annex datasets should be BIDS-compliant. For more information about the BIDS standard, please visit [http://bids.neuroimaging.io](http://bids.neuroimaging.io). 
+All git-annex datasets should be BIDS-compliant. For more information about the BIDS standard, please visit [http://bids.neuroimaging.io](http://bids.neuroimaging.io). For some examples of BIDS datasets, visit [this page](https://github.com/bids-standard/bids-examples). A quick way to verify compliance with the convention is this [online BIDS validator](https://bids-standard.github.io/bids-validator/).
 
-When you receive data from an external collaborator, you can save them under a temporary location: `duke/temp`.
+When you receive raw data from an external collaborator, save them under a temporary location on one of NeuroPoly's server, e.g.: `duke/temp`.
 
 Then, inspect the data and convert them to BIDS. It is recommended to write a script that does the conversion. The 
 script should then be saved under the `code` folder of the final dataset. Some previous scripts can be found on 
 [GitHub](https://github.com/neuropoly/data-management/tree/master/scripts) or under the `code` folder of already existing datasets.
 
-Once the data are converted to BIDS and [uploaded](git-datasets.md#upload) to git-annex repository, delete the temporary folder to save space.
+```{important}
+Once the data are converted to BIDS and [uploaded](git-datasets.md#upload) to git-annex repository, please delete the temporary folder.
+```
 
 ## Building the `raw` dataset
 
-> [Brackets] are characterizing optional informations
+The `raw` dataset corresponds to the core dataset that contains all the different acquisitions generated for one or several subjects. **NO** postprocessing steps should be applied to these acquisitions.
 
-The `raw` dataset corresponds to the core dataset that contains all the different acquisition generated for one or several subjects. **NO** postprocessing steps should be applied to these acquisitions.
+Subjects folders in the `raw` dataset are structured as follows for MRI, with folders corresponding to subjects, [sessions] and MRI modalities:
 
-### Folders structure and filenames
+### Raw structure
 
-Subjects folders in the `raw` dataset are structured as follows for MRI, with folders corresponding to subjects, [sessions] and MRI modalities:
+Useful BIDS specifications are:
+- [File naming conventions](https://bids-specification.readthedocs.io/en/stable/02-common-principles.html#filesystem-structure), 
+- [Modality-agnostic conventions](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#code),
+- [MRI-specific conventions](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html),
+- [Microscopy-specific conventions](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/10-microscopy.html)
 
-#### Raw structure
+The example below applies for MRI data:
 
 ```
-sub-<label>/
-    [ses-<label>/]
-        anat/
-            sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.json
-            sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.nii[.gz]
-        dwi/
-            sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bval
-            sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bvec
-            sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.json
-            sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.nii[.gz]
+├── README
+├── dataset_description.json
+├── participants.tsv
+├── participants.json
+├── code/
+│   └── curate.py
+├── sub-<label>/
+│   └── [ses-<label>/]
+│        ├── anat/
+│        │   ├── sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.json
+│        │   └── sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.nii[.gz]
+│        └── dwi/
+│            ├── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bval
+│            ├── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bvec
+│            ├── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.json
+│            └── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.nii[.gz]
 ```
 
 ```{note}
-Data collected from actual subjects goes under their specific sub-folder
+[Brackets] are characterizing optional informations
 ```
 
-#### Subject naming convention
+### Subject naming convention
 
-**Basic convention**: sub-XXX
+**Basic convention**: `sub-XXX`
 
 Example:
 
@@ -52,7 +64,7 @@ sub-001
 sub-002
 ```
 
-**Multi-institution/Multi-pathology convention**: sub-\<site>\<pathology>XXX
+**Multi-institution/Multi-pathology convention**: `sub-\<site>\<pathology>XXX`
 
 Example of Multi-institution dataset:
 
@@ -71,10 +83,8 @@ sub-torHC001       # tor stands for Toronto and HC stands for Healthy Controls
 sub-zurSCI001      # zur stands for Zurich and SCI stands for Spinal Cord Injury
 ```
 
-Regarding BIDS filenames, they are constructed using 3 types of elements:
-
 
-#### Raw entities
+### Raw entities
 
 Characterized by a key word (sub, ses, acq, etc.) and a value (label = an alphanumeric value, index = a nonnegative integer, etc) separated with a dash `-`
 - `sub-<label>`
@@ -98,7 +108,7 @@ Examples of special cases below:
 If you to combine several above mentioned tags, use camelCase. For example, `sub-001_acq-cspineSag_T1w.nii.gz`.
 ```
 
-#### Raw suffixes
+### Raw suffixes
 
 An alphanumeric string located after all the entities following a final underscore `_` (i.e. the `<suffix>`). This suffix corresponds for MRI to the MRI contrast:
 - `T1w`
@@ -109,7 +119,7 @@ An alphanumeric string located after all the entities following a final undersco
 Only **ONE** suffix can be used within the filename.
 
 
-#### Raw extensions
+### Raw extensions
 
  Files extensions:
 - `.nii.gz`
@@ -117,32 +127,8 @@ Only **ONE** suffix can be used within the filename.
 - `.bval`
 - etc.
 
-#### Other modalities
-
-Many kinds of data have a place specified for them by BIDS. See [file naming conventions](https://bids-specification.readthedocs.io/en/stable/02-common-principles.html#filesystem-structure) and the [MRI](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html) and [Microscopy](https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/10-microscopy.html) extensions for full details.
-
-
-### Raw template
-
-⚠️ In addition to the subjects folders, every `raw` dataset must include the following files: 
-
-```
-├── README
-├── dataset_description.json
-├── participants.tsv
-├── participants.json
-├── code/
-│   └── curate.py
-├── sub-XXX
-│   └── anat
-│       └──sub-XXX_T1w.nii.gz
- ...
-```
-
-For details, see [BIDS specification](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#code).
-For examples, see [BIDS examples](https://github.com/bids-standard/bids-examples).
 
-#### `README`
+### `README`
 
 The [`README`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#readme) is a [markdown](https://markdown-guide.readthedocs.io/en/latest/index.html) file describing the dataset in more detail.
 
@@ -165,7 +151,7 @@ Dataset shared by: <NAME AND EMAIL>
 <LIST HERE MISSING SUBJECTS>
 ```
 
-#### `dataset_description.json`
+### `dataset_description.json`
 
 The [`dataset_description.json`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#dataset_descriptionjson) is a JSON file describing the dataset.
 
@@ -182,26 +168,12 @@ Please use the `dataset_description.json` template below:
 ```{note}
 Refer to the [BIDS spec](https://bids-specification.readthedocs.io/) to know what version to fill in here.
 ```
-    
- ```{warning}
-The `dataset_description.json` file within the top-level dataset should include `"DatasetType": "raw"`.
- ```
 
 
-#### `participants.tsv`
+### `participants.tsv`
 
-The [`participants.tsv`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#participants-file) is a TSV file and should include at least the following columns:
+The [`participants.tsv`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#participants-file) is a Tab-separated value file that lists all subjects in the dataset with useful metadata. Please start off from the example below:
 
-| participant_id | source_id | species | age | sex | pathology  | institution |
-| ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- |
-| sub-001 | 001 | homo sapiens | 30 | F | HC | montreal |
-| sub-002 | 005 | homo sapiens | 40 | O | MS | montreal |
-| sub-003 | 007 | homo sapiens  | n/a | n/a | MS | toronto |
-
-Authorized values for `pathology` are listed under [`participants.json`](#participantsjson).
-
-Please use the `participants.tsv` template below:
-    
 ```
 participant_id	source_id	species	age	sex	pathology	institution
 sub-001	001	homo sapiens	30	F	HC	montreal
@@ -209,18 +181,14 @@ sub-002	005	homo sapiens	40	O	MS	montreal
 sub-003	007	homo sapiens	n/a	n/a	MS	toronto
 ```
 
-Other columns may be added if the data exists to fill them and it would be useful to keep.
-
-```{warning}
-Indicate missing values with `n/a` (for "not available"), not by empty cells!
-```
-
-```{warning}
-This is a Tab-Separated-Values file. Make sure to use tabs between entries if editing with a text editor. Most spreadsheet software can read and write .tsv correctly.
-```
+Additional notes:
+- Authorized values for `pathology` are listed under [`participants.json`](#participantsjson).
+- Indicate missing values with `n/a` (for "not available"), not by empty cells!
+- In the example above, the apparent mismatch between 'pathology' and the values is caused by the tabs
+- Other columns can be added if the metadata are relevant
 
 
-#### `participants.json`
+### `participants.json`
 
 The [`participants.json`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#participants-file) is a JSON file providing a legend for the columns in `participants.tsv`, with longer descriptions, units, and in the case of categorical variables, allowed levels. Please use the template below:
 
@@ -273,7 +241,7 @@ The [`participants.json`](https://bids-specification.readthedocs.io/en/stable/03
 }
 ```
 
-#### `code/`
+### `code/`
 
 The data cleaning and curation script(s) that create the `sub-XXX/` folders should be kept with them, under the [`code/`](https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html#code) folder. Within reason, every dataset should have a script that when run like
 
@@ -286,7 +254,7 @@ unpacks, converts and renames all the images and related files in `path/to/sourc
 This program should be committed first, before the curated data it produces. Afterwards, every commit that modifies the code should also re-run it, and the code and re-curated data should be committed in tandem.
 
 ```{note}
-Analysis scripts should not be kept here. Keep them in separate repositories, usually in public on GitHub, with instructions about. See [PIPELINE-DOC](TODO-PIPELINE-DOC).
+Analysis scripts should not be kept here. Keep them in separate repositories, usually in public on GitHub, with instructions about.
 ```
 
 

From 45fc8a72a9959e9043a98d23134f3efe7dc9b230 Mon Sep 17 00:00:00 2001
From: jcohenadad <jcohen@polymtl.ca>
Date: Tue, 16 Jan 2024 16:23:06 -0500
Subject: [PATCH 7/7] Clarified derivatives section

---
 data/dataset-curation.md | 196 +++++++++++++++++----------------------
 1 file changed, 84 insertions(+), 112 deletions(-)

diff --git a/data/dataset-curation.md b/data/dataset-curation.md
index 386b30eb..0291916b 100644
--- a/data/dataset-curation.md
+++ b/data/dataset-curation.md
@@ -42,6 +42,8 @@ The example below applies for MRI data:
 │        ├── anat/
 │        │   ├── sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.json
 │        │   └── sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_part-<mag|phase|real|imag>]_<suffix>.nii[.gz]
+│        ├── fmap/
+│        ├── fmri/
 │        └── dwi/
 │            ├── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bval
 │            ├── sub-<label>[_ses-<label>][_acq-<label>][_rec-<label>][_dir-<label>][_run-<index>][_part-<mag|phase|real|imag>]_dwi.bvec
@@ -264,54 +266,58 @@ First, it is important to understand what are [BIDS derivatives](https://bids-sp
 
 > Derivatives are outputs of common processing pipelines, capturing data and meta-data sufficient for a researcher to understand and (critically) reuse those outputs in subsequent processing. Standardizing derivatives is motivated by use cases where formalized machine-readable access to processed data enables higher level processing.
 
-Basically, derivative folders are derived datasets generated from a raw dataset. They must include **ONLY** processed data obtained from a specific raw dataset (i.e. segmentations, masks, labels...).
+Derivative folders are derived datasets generated from a raw dataset. They must include **ONLY** processed data obtained from a specific raw dataset (e.g., segmentations, masks, labels).
 
 ```{warning}
 In this section we decided not to fully follow the BIDS derivatives convention. For more information please see our related [issue](https://github.com/neuropoly/data-management/issues/282). 
 ```
 
-```{warning}
+
+### Derivatives structure
+
+According to BIDS, derived datasets could be stored inside a parent folder [`derivatives/`](https://bids-specification.readthedocs.io/en/stable/common-principles.html#storage-of-derived-datasets) _"to make a clear distinction between raw data and results of data processing"_. This folder should also follow the same folder logic as the one used for the `raw` data.
+
 Derivative data obtained using different processes/workflows should ideally be stored using different derivatives folders. Eg:
 - `derivatives/labels/`
 - `derivatives/sct_5.6/`
 - `derivatives/fmriprep_2.3/`
-However, to streamline data identification and reduce the need for extensive folder crawling, we [opted](https://github.com/neuropoly/data-management/issues/282) to gather common labels, such as binary segmentation and point-wise labeling, into the same derivative folder called labels. For particular project, having a separe derived folder can still be envisioned.
-```
-
-```{note}
-According to BIDS, derived datasets could be stored inside a parent folder [`derivatives/`](https://bids-specification.readthedocs.io/en/stable/common-principles.html#storage-of-derived-datasets) _"to make a clear distinction between raw data and results of data processing"_. This folder should also follow the same folder logic as the one used for the `raw` data.
-```
-
-### Folders structure and filenames
-
-Here, we describe how the `derivative` folder should be organized.
 
 ```{note}
-In the guideline below, [brackets] refer to optional items.
+Despite what is written above, to streamline data identification and reduce the need for extensive folder crawling, we [opted](https://github.com/neuropoly/data-management/issues/282) for common folder names, such as `labels/`, that typically contains binary segmentation and point-wise labels.
 ```
 
-#### Derivatives structure
-
 Derived datasets follow the **same structure and hierarchy** as the `raw` dataset, with folders corresponding to subjects, [sessions] and MRI modalities:
 
 ```
-sub-<label>/
-    [ses-<label>/]
-        modality/
-            <source_filename>[_space-<space>][_res-<label>][_den-<label>][_desc-<label>]_<suffix>.<extension>
+├── README
+├── dataset_description.json
+├── participants.tsv
+├── participants.json
+├── code/
+├── sub-<label>/
+└── derivatives/
+    └── <label>                  <-- name of the label derivatives
+        └── sub-<label>/]
+            └── [ses-<label>/]
+                └── modality/   <-- could be 'anat', 'fmap', 'fmri', etc.
+                    └── <source_filename>[_space-<space>][_res-<label>][_den-<label>][_desc-<label>]_<suffix>.<extension>
 ```
 
-Regarding derivatives filenames, we can identify the same 3 type of elements as before (entities, suffixes and extensions) plus 1 extra-consideration related to the raw data:
-
 ```{warning}
 Entities and suffixes are different from those used with the raw filenames and are specific to [data types](https://bids-specification.readthedocs.io/en/stable/derivatives/imaging.html#imaging-data-types).
 ```
 
-#### `<source_filename>`
+```{note}
+Because derived datasets are datasets, files and folders presented in the raw template section could also be included in this dataset (e.g. README, code/, etc.)
+```
+
+
+### `<source_filename>`
 
 This element corresponds to the entire source filename, with the **omission** of the extension. For example, if the source file name is `sub-02_acq-MTon_MTS.nii.gz`, the `<source_filename>` to be used for the derivatives is `sub-02_acq-MTon_MTS`.
 
-#### Derivative entities
+
+### Derivative entities
 
 Characterized by a key word (space, res, den, etc.) and a value (label = an alphanumeric value, index = a nonnegative integer, etc) separated with a dash `-`
 - `[space-<space>]`: image space if different from raw space: template space (e.g. MNI305 etc), orig, other etc. (see [BIDS](https://bids-specification.readthedocs.io/en/stable/derivatives/common-data-types.html#spatial-references))
@@ -322,7 +328,8 @@ Characterized by a key word (space, res, den, etc.) and a value (label = an alph
 
 Entities are then separated using underscores `_`
 
-#### Derivative suffixes
+
+### Derivative suffixes
 
 An alphanumeric string located after all the entities following a final underscore `_` :
 
@@ -335,64 +342,45 @@ An alphanumeric string located after all the entities following a final undersco
 |`dlabel`| `label-<label>` | Suffix used for discrete labels representing multiple anatomical structures. The entity is used to specify the atlas used to label the different structures |
 
 ```{warning}
-The corresponding entity `label-<label>` is here mandatory to specify the labeled region !
+Here, the corresponding entity `label-<label>` is mandatory to specify the labeled region.
 ```
 
-#### Derivatives extensions
+
+### Derivatives extensions
 
  Files extensions:
 - `.nii.gz`
 - `.json`
 - etc.
 
-### Derivative template
-
 In addition to the subjects folders, derived datasets must include their own `dataset_description.json` file to track all the processing steps used to create the data. Example:
 
-#### `dataset_description.json`
+
+### `derivatives/labels/dataset_description.json`
     
 ```json
 {
     "BIDSVersion": "1.9.0",
     "Name": "<dataset_name>",
-    "DatasetType": "derivative",
-    "GeneratedBy": [
-        {
-            "Name": "sct_deepseg_sc",
-            "Version": "SCT v6.1"
-        },
-        {
-            "Name": "Manual",
-            "Description": "Manually corrected by Nathan Molinier and Pierre-Louis Benveniste."
-        }
-    ]
+    "DatasetType": "derivative"
 }
 ```
 
-The field `GeneratedBy` has to be used to name the different functions and processes used to generate the data.
 
-```{warning}
-The `dataset_description.json` file within the derived dataset should include `"DatasetType": "derivative"`.
-```
+### `derivatives/labels/descriptions.tsv`
 
-```{note}
-If more details about the processing steps used have to be provided (e.g., reorientation, resampling etc.), a [`descriptions.tsv`](https://bids-specification.readthedocs.io/en/stable/derivatives/common-data-types.html#descriptionstsv) file may be added at the root of the folder. This file must contain at least two columns:
+To provide more details about the processing steps (e.g., reorientation, resampling), a [`descriptions.tsv`](https://bids-specification.readthedocs.io/en/stable/derivatives/common-data-types.html#descriptionstsv) file may be added at the root of the folder. This file must contain at least two columns:
 - `desc_id`: contains all the labels used with the [desc](https://bids-specification.readthedocs.io/en/stable/appendices/entities.html#desc) entity within the filenames accross the entire dataset.
 - `description`: human readable descriptions
-```
 
-```{note}
-Because derived datasets are datasets, files and folders presented in the raw template section could also be included in this dataset (e.g. README.md, code/, etc.)
-```
 
 ### JSON sidecars
 
 JSON sidecars are companion files linked to data files. They share the same filenames but have a ".json" extension. These files store essential metadata, serving as guidebooks to provide crucial details about the associated data, ensuring organized and comprehensive information.
 
-Therefore, to improve the way we track our data, `.json` sidecars will have to be generated for each data present in derived datasets. Here are few examples of JSON sidecar:
+Therefore, to improve the way we track our data, `.json` sidecars have to be generated for each data present in derived datasets. Here are few examples of JSON sidecar:
 
-<details>
-<summary>JSON sidecar (Manually created in the ORIGINAL SPACE)</summary>
+Below is a JSON sidecar describing a fully-manual labels created in the ORIGINAL SPACE:
     
 ```json
 {
@@ -407,10 +395,7 @@ Therefore, to improve the way we track our data, `.json` sidecars will have to b
 }
 ```
 
-</details>
-
-<details>
-<summary>JSON sidecar (Data automatically created then manually corrected in the ORIGINAL SPACE)</summary>
+If the label was previously produced by an automatic algorithm, append to the `GeneratedBy` section:
     
 ```json
 {
@@ -429,10 +414,7 @@ Therefore, to improve the way we track our data, `.json` sidecars will have to b
 }
 ```
 
-</details>
-
-<details>
-<summary>JSON sidecar (Data RESAMPLED and CROPPED)</summary>
+If the label is created _after_ the data was resampled and cropped, indicate it under `SpatialReference`:
     
 ```json
 {
@@ -459,15 +441,8 @@ Therefore, to improve the way we track our data, `.json` sidecars will have to b
 }
 ```
 
-```{warning}
-Because the space used for the derived data is different from the original raw data, the entity `space-other` **MUST** also be used in the filename.
-```
+Another example of a label created in another space than the image (here: the PAM50 template):
 
-</details>
-
-<details>
-<summary>JSON sidecar (Data moved to the PAM50 SPACE)</summary>
-    
 ```json
 {
     "SpatialReference": "PAM50",
@@ -481,74 +456,71 @@ Because the space used for the derived data is different from the original raw d
 ```
 
 ```{warning}
-Because the space used for the derived data is different from the original raw data, the entity `space-template` or `space-PAM50` **MUST** also be used in the filename.
+For better clarity, if the image space is different between the raw data and the label (as is the case above), the entity `space-other` **MUST** also be used in the filename.
 ```
 
-</details>
 
-```{note}
-If the image space is different from the original image, the entity `space-<label>` has to be used. The entity `space-template` may be used for templates and `space-other` for other transformations.
-```
 
-### Regions of interest and atlases
+### Label names
 
 To be consistent regarding the way anatomical regions will be referred to, please follow this table (based on the BIDS [labels](https://bids-specification.readthedocs.io/en/stable/derivatives/imaging.html#common-image-derived-labels)):
 
-| Abbreviation (label) | Description |
-| :---: | :---: |
+| label | Description |
+| --- | --- |
 | SC | Spinal Cord |
 | GM | Gray Matter |
 | WM | White Matter |
-| lesion | Lesion (MS, SCI etc.) |
-| discs | Intervertebral discs |
-| vertebrae | Vertebrae |
+| discs | Intervertebral discs, with values following [this convention](https://spinalcordtoolbox.com/user_section/tutorials/vertebral-labeling/labeling-conventions.html) |
+| vertebrae | Vertebrae, with values following [this convention](https://spinalcordtoolbox.com/user_section/tutorials/vertebral-labeling/labeling-conventions.html) |
 | rootlets | Spinal rootlets |
-| PMJ | Pontomedullary Junction |
+| PMJ | Pontomedullary Junction, indicated as a single voxel with a value '50' |
 | CSF | Cerebrospinal Fluid |
-| compression | Spinal Cord Compression |
+| compression | Spinal Cord Compression, indicated as a single voxel with a value '1' at the point of compression. There can be more than one compression. |
+| lesion | Lesion (e.g., multiple sclerosis plaques, spinal cord injury lesions). The pathology associated with the lesion is indicated in the file `participants.tsv` |
 | tumor | Tumor |
 | edema | Edema |
 | cavity | Cavity |
-| axon | Axon |
-| myelin | Myelin |
+| axon | Axon (used in microscopy datasets) |
+| myelin | Myelin (used in microscopy datasets) |
 
 When multiple anatomical regions are present in the image, atlases should be used. When specified, these atlases **SHOULD** be added to a folder `atlases/` at the root of the derivative folder or a URL should be included inside the json sidecars.
 
+
 ### Examples and use cases
 
-Let's consider a dataset with one single subject `sub-001`. Here is an example of the structure of the final dataset:
+Here is an example of a dataset structure with a single subject `sub-001`:
 
 ```
 sci-bordeaux
-    ├── README.md
-    ├── dataset_description.json
-    ├── participants.tsv
-    ├── participants.json
-    ├── code/
-    │   └── curate.py
-    │
-    ├── sub-001
-    │   └── anat
-    │       ├──sub-001_acq-sag_T2w.nii.gz
-    │       └──sub-001_acq-sag_T2w.json
-    │
-    └── derivatives
-        └── labels
-            ├── dataset_description.json
-            ├── README.md
-            └── sub-001
-                └── anat
-                    ├── sub-001_acq-sag_T2w_label-SC_seg.nii.gz  # spinal cord (SC) binary segmentation 
-                    ├── sub-001_acq-sag_T2w_label-SC_softseg.nii.gz  # spinal cord (SC) soft segmentation
-                    ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling using the following convention: https://spinalcordtoolbox.com/user_section/tutorials/vertebral-labeling/labeling-conventions.html
-                    ├── sub-001_acq-sag_T2w_label-vertebrae_dseg  # vertebrae discrete segmentation (segmented stuctures have different values based on the vertebral levels)
-                    ├── sub-001_acq-sag_T2w_label-rootlets_dseg  # nerve rootlets discrete segmentation (segmented stuctures have different values based on the spinal level)
-                    ├── sub-001_acq-sag_T2w_label-compression_label.nii.gz  # binary compression labeling (compression levels are indicated as a single voxel with a value '1' at the point of compression)
-                    ├── sub-001_acq-sag_T2w_label-PMJ_dlabel  # Pontomedullary junction is indicated as a single voxel with a value '50'
-                    └── sub-001_acq-sag_T2w_label-lesion_seg  # lesion binary segmentation (the associated disease could be SCI, MS, etc. and is indicated in the file participants.tsv)
-
+├── README
+├── dataset_description.json
+├── participants.tsv
+├── participants.json
+├── code/
+│   └── curate.py
+│
+├── sub-001
+│   └── anat
+│       ├──sub-001_acq-sag_T2w.nii.gz
+│       └──sub-001_acq-sag_T2w.json
+│
+└── derivatives
+    └── labels
+        ├── dataset_description.json
+        ├── README
+        └── sub-001
+            └── anat
+                ├── sub-001_acq-sag_T2w_label-SC_seg.nii.gz  # spinal cord (SC) binary segmentation 
+                ├── sub-001_acq-sag_T2w_label-SC_softseg.nii.gz  # spinal cord (SC) soft segmentation
+                ├── sub-001_acq-sag_T2w_label-discs_dlabel.nii.gz  # discrete discs labeling
+                ├── sub-001_acq-sag_T2w_label-vertebrae_dseg  # vertebrae discrete segmentation (segmented stuctures have different values based on the vertebral levels)
+                ├── sub-001_acq-sag_T2w_label-rootlets_dseg  # nerve rootlets discrete segmentation
+                ├── sub-001_acq-sag_T2w_label-compression_label.nii.gz  # binary compression labeling
+                ├── sub-001_acq-sag_T2w_label-PMJ_dlabel  # Pontomedullary junction, indicated as a single voxel with a value '50'
+                └── sub-001_acq-sag_T2w_label-lesion_seg  # lesion binary segmentation
 ```
 
+
 ## Changelog policy
 
 We use `git log` to track our changes. That means care should be taken to [write good messages](../geek-tips/git.md#commit-message-convention): they are there to help both you and future researchers understand how the dataset evolved.