Skip to content

Commit

Permalink
feat: decode v4.* notebooks
Browse files Browse the repository at this point in the history
Decoder for v4.4 is reused for all notebooks with major version 4, because they
their differences do not affect how the notebooks are rendered:
- v4.5 requires that each cell has a unique ID
- versions < v4.3 do not have the 'code_cell.metadata.execution' field, which holds the code's execution time.
  They also do not have 'raw_cell.metadata.jupyterf.source_hidden' which controls if the source is hidden.
  This has a default behaviour in 'nb' and is probably not that important anyways.
  Finally, they miss metadata.title field, which is currently not used also in v4.4 notebooks.
  • Loading branch information
bevzzz committed Feb 24, 2024
1 parent 40a335d commit 96767ed
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 3 deletions.
50 changes: 50 additions & 0 deletions decode/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ func TestDecodeBytes(t *testing.T) {
json string
nCells int
}{
{
name: "v4.5",
json: `{
"nbformat": 4, "nbformat_minor": 5, "metadata": {}, "cells": [
{"id": "a", "cell_type": "markdown", "metadata": {}, "source": []},
{"id": "b", "cell_type": "markdown", "metadata": {}, "source": []}
]
}`,
nCells: 2,
},
{
name: "v4.4",
json: `{
Expand All @@ -47,6 +57,46 @@ func TestDecodeBytes(t *testing.T) {
}`,
nCells: 2,
},
{
name: "v4.3",
json: `{
"nbformat": 4, "nbformat_minor": 3, "metadata": {}, "cells": [
{"cell_type": "markdown", "metadata": {}, "source": []},
{"cell_type": "markdown", "metadata": {}, "source": []}
]
}`,
nCells: 2,
},
{
name: "v4.2",
json: `{
"nbformat": 4, "nbformat_minor": 2, "metadata": {}, "cells": [
{"cell_type": "markdown", "metadata": {}, "source": []},
{"cell_type": "markdown", "metadata": {}, "source": []}
]
}`,
nCells: 2,
},
{
name: "v4.1",
json: `{
"nbformat": 4, "nbformat_minor": 1, "metadata": {}, "cells": [
{"cell_type": "markdown", "metadata": {}, "source": []},
{"cell_type": "markdown", "metadata": {}, "source": []}
]
}`,
nCells: 2,
},
{
name: "v4.0",
json: `{
"nbformat": 4, "nbformat_minor": 0, "metadata": {}, "cells": [
{"cell_type": "markdown", "metadata": {}, "source": []},
{"cell_type": "markdown", "metadata": {}, "source": []}
]
}`,
nCells: 2,
},
} {
t.Run(tt.name, func(t *testing.T) {
nb, err := decode.Bytes([]byte(tt.json))
Expand Down
11 changes: 11 additions & 0 deletions schema/schema.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
// Package schema defines the common data format for elements of a Jupyter notebook.
//
// It is based on the [v4.4] definition, as it is stable and encompasses all the data
// necessary for accurate rendering. Note, that schema validation is not a goal of this
// package, and so, interfaces defined here will often omit the non-essential data,
// e.g. metadata or fields specific to JupyterLab environment.
//
// [v4.4]: https://github.com/jupyter/nbformat/blob/main/nbformat/v4/nbformat.v4.4.schema.json
package schema

import (
Expand Down Expand Up @@ -34,6 +42,9 @@ type Cell interface {
Text() []byte
}

// HasAttachments is implemented by cells which include [cell attachments].
//
// [cell attachments]: https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments
type HasAttachments interface {
// Attachments are only defined for v4.0 and above for markdown and raw cells
// and may be omitted in the JSON. Cells without attachments should return nil.
Expand Down
14 changes: 11 additions & 3 deletions schema/v4/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,21 @@ import (
)

func init() {
decode.RegisterDecoder(version, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 5}, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 4}, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 3}, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 2}, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 1}, new(decoder))
decode.RegisterDecoder(schema.Version{Major: 4, Minor: 0}, new(decoder))
}

var version = schema.Version{Major: 4, Minor: 4}

// decoder decodes cell contents and metadata for nbformat v4.4.
// Other versions can be decoded using the same, as their schema
// differs in ways that does not affect how the notebook is rendered.
type decoder struct{}

var _ decode.Decoder = (*decoder)(nil)

func (d *decoder) DecodeMeta(data []byte) (schema.NotebookMetadata, error) {
var nm NotebookMetadata
if err := json.Unmarshal(data, &nm); err != nil {
Expand Down

0 comments on commit 96767ed

Please sign in to comment.