Skip to content

Commit

Permalink
Merge pull request #1292 from xl0/trailing-n
Browse files Browse the repository at this point in the history
nbdev_clean: Add trailing newlines to mask diff between Jupyter and VSCode
  • Loading branch information
jph00 authored Feb 9, 2023
2 parents 6df9d69 + 2600419 commit 29c9652
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 21 deletions.
1 change: 1 addition & 0 deletions nbdev/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
'git_url': 'https:/fastai/nbdev',
'lib_path': 'nbdev'},
'syms': { 'nbdev.clean': { 'nbdev.clean._add_jupyter_hooks': ('api/clean.html#_add_jupyter_hooks', 'nbdev/clean.py'),
'nbdev.clean._add_trailing_n': ('api/clean.html#_add_trailing_n', 'nbdev/clean.py'),
'nbdev.clean._clean_cell': ('api/clean.html#_clean_cell', 'nbdev/clean.py'),
'nbdev.clean._clean_cell_output': ('api/clean.html#_clean_cell_output', 'nbdev/clean.py'),
'nbdev.clean._clean_cell_output_id': ('api/clean.html#_clean_cell_output_id', 'nbdev/clean.py'),
Expand Down
35 changes: 20 additions & 15 deletions nbdev/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,25 @@ def _clean_cell_output_id(lines):
return _skip_or_sub(lines) if isinstance(lines,str) else [_skip_or_sub(o) for o in lines]

# %% ../nbs/api/11_clean.ipynb 11
def _add_trailing_n(img):
if not isinstance(img,str): return [ _add_trailing_n(o) for o in img ]
return img + "\n" if img[-1] != "\n" else img

# %% ../nbs/api/11_clean.ipynb 12
def _clean_cell_output(cell, clean_ids):
"Remove `cell` output execution count and optionally ids from text reprs"
outputs = cell.get('outputs', [])
for o in outputs:
if 'execution_count' in o: o['execution_count'] = None
data = o.get('data', {})
data.pop("application/vnd.google.colaboratory.intrinsic+json", None)
if clean_ids:
for k in data:
if k.startswith('text'): data[k] = _clean_cell_output_id(data[k])
if 'text' in o: o['text'] = _clean_cell_output_id(o['text'])
for k in data:
if k.startswith('text') and clean_ids: data[k] = _clean_cell_output_id(data[k])
if k.startswith('image'): data[k] = _add_trailing_n(data[k])
if 'text' in o and clean_ids: o['text'] = _clean_cell_output_id(o['text'])
o.get('metadata', {}).pop('tags', None)

# %% ../nbs/api/11_clean.ipynb 12
# %% ../nbs/api/11_clean.ipynb 13
def _clean_cell(cell, clear_all, allowed_metadata_keys, clean_ids):
"Clean `cell` by removing superfluous metadata or everything except the input if `clear_all`"
if 'execution_count' in cell: cell['execution_count'] = None
Expand All @@ -79,7 +84,7 @@ def _clean_cell(cell, clear_all, allowed_metadata_keys, clean_ids):
cell['metadata'] = {} if clear_all else {
k:v for k,v in cell['metadata'].items() if k in allowed_metadata_keys}

# %% ../nbs/api/11_clean.ipynb 13
# %% ../nbs/api/11_clean.ipynb 14
def clean_nb(
nb, # The notebook to clean
clear_all=False, # Remove all cell metadata and cell outputs?
Expand All @@ -97,12 +102,12 @@ def clean_nb(
nb['metadata']['kernelspec']['display_name'] = nb.metadata.kernelspec.name
nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in metadata_keys}

# %% ../nbs/api/11_clean.ipynb 24
# %% ../nbs/api/11_clean.ipynb 27
def _reconfigure(*strms):
for s in strms:
if hasattr(s,'reconfigure'): s.reconfigure(encoding='utf-8')

# %% ../nbs/api/11_clean.ipynb 25
# %% ../nbs/api/11_clean.ipynb 28
def process_write(warn_msg, proc_nb, f_in, f_out=None, disp=False):
if not f_out: f_out = f_in
if isinstance(f_in, (str,Path)): f_in = Path(f_in).open()
Expand All @@ -115,15 +120,15 @@ def process_write(warn_msg, proc_nb, f_in, f_out=None, disp=False):
warn(f'{warn_msg}')
warn(e)

# %% ../nbs/api/11_clean.ipynb 26
# %% ../nbs/api/11_clean.ipynb 29
def _nbdev_clean(nb, path=None, clear_all=None):
cfg = get_config(path=path)
clear_all = clear_all or cfg.clear_all
allowed_metadata_keys = cfg.get("allowed_metadata_keys").split()
allowed_cell_metadata_keys = cfg.get("allowed_cell_metadata_keys").split()
return clean_nb(nb, clear_all, allowed_metadata_keys, allowed_cell_metadata_keys, cfg.clean_ids)

# %% ../nbs/api/11_clean.ipynb 27
# %% ../nbs/api/11_clean.ipynb 31
@call_parse
def nbdev_clean(
fname:str=None, # A notebook name or glob to clean
Expand All @@ -139,15 +144,15 @@ def nbdev_clean(
if fname is None: fname = get_config().nbs_path
for f in globtastic(fname, file_glob='*.ipynb', skip_folder_re='^[_.]'): _write(f_in=f, disp=disp)

# %% ../nbs/api/11_clean.ipynb 30
# %% ../nbs/api/11_clean.ipynb 34
def clean_jupyter(path, model, **kwargs):
"Clean Jupyter `model` pre save to `path`"
if not (model['type']=='notebook' and model['content']['nbformat']==4): return
get_config.cache_clear() # Allow config changes without restarting Jupyter
jupyter_hooks = get_config(path=path).jupyter_hooks
if jupyter_hooks: _nbdev_clean(model['content'], path=path)

# %% ../nbs/api/11_clean.ipynb 33
# %% ../nbs/api/11_clean.ipynb 37
_pre_save_hook_src = '''
def nbdev_clean_jupyter(**kwargs):
try: from nbdev.clean import clean_jupyter
Expand All @@ -157,7 +162,7 @@ def nbdev_clean_jupyter(**kwargs):
c.ContentsManager.pre_save_hook = nbdev_clean_jupyter'''.strip()
_pre_save_hook_re = re.compile(r'c\.(File)?ContentsManager\.pre_save_hook')

# %% ../nbs/api/11_clean.ipynb 34
# %% ../nbs/api/11_clean.ipynb 38
def _add_jupyter_hooks(src, path):
if _pre_save_hook_src in src: return
mod = ast.parse(src)
Expand All @@ -175,12 +180,12 @@ def _add_jupyter_hooks(src, path):
if src: src+='\n\n'
return src+_pre_save_hook_src

# %% ../nbs/api/11_clean.ipynb 38
# %% ../nbs/api/11_clean.ipynb 42
def _git_root():
try: return Path(run('git rev-parse --show-toplevel'))
except OSError: return None

# %% ../nbs/api/11_clean.ipynb 41
# %% ../nbs/api/11_clean.ipynb 45
@call_parse
def nbdev_install_hooks():
"Install Jupyter and git hooks to automatically clean, trust, and fix merge conflicts in notebooks"
Expand Down
50 changes: 45 additions & 5 deletions nbs/api/11_clean.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,18 @@
"test_eq(_clean_cell_output_id('foo\\n<function _add2 at 0x7f8252378820>\\nbar'), 'foo\\n<function _add2>\\nbar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#|exporti\n",
"def _add_trailing_n(img):\n",
" if not isinstance(img,str): return [ _add_trailing_n(o) for o in img ]\n",
" return img + \"\\n\" if img[-1] != \"\\n\" else img"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -158,10 +170,10 @@
" if 'execution_count' in o: o['execution_count'] = None\n",
" data = o.get('data', {})\n",
" data.pop(\"application/vnd.google.colaboratory.intrinsic+json\", None)\n",
" if clean_ids:\n",
" for k in data:\n",
" if k.startswith('text'): data[k] = _clean_cell_output_id(data[k])\n",
" if 'text' in o: o['text'] = _clean_cell_output_id(o['text'])\n",
" for k in data:\n",
" if k.startswith('text') and clean_ids: data[k] = _clean_cell_output_id(data[k])\n",
" if k.startswith('image'): data[k] = _add_trailing_n(data[k])\n",
" if 'text' in o and clean_ids: o['text'] = _clean_cell_output_id(o['text'])\n",
" o.get('metadata', {}).pop('tags', None)"
]
},
Expand Down Expand Up @@ -208,6 +220,27 @@
" nb['metadata'] = {k:v for k,v in nb['metadata'].items() if k in metadata_keys}"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Jupyter adds a trailing <code>\\n</code> to images in cell outputs. Vscode-jupyter does not.\\\n",
"Notebooks should be brought to a common style to avoid unnecessary diffs:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"test_nb = read_nb('../../tests/image.ipynb')\n",
"assert test_nb.cells[0].outputs[0].data['image/png'][-1] != \"\\n\" # Make sure it was not converted by acccident\n",
"clean_nb(test_nb)\n",
"assert test_nb.cells[0].outputs[0].data['image/png'][-1] == \"\\n\""
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -357,6 +390,13 @@
" return clean_nb(nb, clear_all, allowed_metadata_keys, allowed_cell_metadata_keys, cfg.clean_ids)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -819,7 +859,7 @@
"split_at_heading": true
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "torch",
"language": "python",
"name": "python3"
}
Expand Down
2 changes: 1 addition & 1 deletion nbs/tutorials/tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -974,7 +974,7 @@
{
"data": {
"image/svg+xml": [
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"100\"><circle cx=\"50\" cy=\"50\" r=\"40\"/></svg>"
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"100\"><circle cx=\"50\" cy=\"50\" r=\"40\"/></svg>\n"
],
"text/plain": [
"<IPython.core.display.SVG object>"
Expand Down
35 changes: 35 additions & 0 deletions tests/image.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAAgAAAAICAIAAABLbSncAAAAE0lEQVR4nGNkaGDACpiwCw9WCQBqCACQJ5at+QAAAABJRU5ErkJggg==",
"text/plain": [
"<PIL.Image.Image image mode=RGB size=8x8>"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from PIL import Image\n",
"Image.new(mode='RGB', size=(8, 8), color=\"green\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "torch",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 29c9652

Please sign in to comment.