🚸 Only auto-search ._name_field on sub-classes of CanCurate (#2319)

laminlabs · Jan 4, 2025 · 79e0b74 · 79e0b74
1 parent c0b61f5
commit 79e0b74
Show file tree

Hide file tree

Showing 8 changed files with 42 additions and 34 deletions.
diff --git a/docs/arrays.ipynb b/docs/arrays.ipynb
@@ -58,26 +58,13 @@
     "!lamin init --storage s3://lamindb-ci/test-array-notebook --name test-array-notebook"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "tags": [
-     "hide-output"
-    ]
-   },
-   "outputs": [],
-   "source": [
-    "import lamindb as ln"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "ln.settings.verbosity = \"info\""
+    "import lamindb as ln"
    ]
   },
   {
@@ -267,7 +254,7 @@
    },
    "outputs": [],
    "source": [
-    "adata_subset.to_memory()"
+    "# adata_subset.to_memory()"
    ]
   },
   {

diff --git a/docs/scripts/run-track-with-params.py b/docs/scripts/run-track-with-params.py
@@ -15,6 +15,8 @@
             "normalization": "the_good_one",
         },
     }
-    ln.track("JjRF4mACd9m00001", params=params)
+    ln.track(params=params)
+
     # your code
+
     ln.finish()
diff --git a/docs/track.ipynb b/docs/track.ipynb
@@ -46,9 +46,7 @@
     "```python\n",
     "import lamindb as ln\n",
     "\n",
-    "# --> `ln.track()` generates a uid for your code\n",
-    "# --> `ln.track(uid)` initiates a tracked run\n",
-    "ln.track(\"9priar0hoE5u0000\")\n",
+    "ln.track()  # initiate a tracked notebook/script run\n",
     "\n",
     "# your code\n",
     "\n",
@@ -79,7 +77,7 @@
     "In the API, filter {class}`~lamindb.Transform` to obtain a transform record:\n",
     "\n",
     "```python\n",
-    "transform = ln.Transform.get(name=\"Track notebooks & scripts\")\n",
+    "transform = ln.Transform.get(key=\"my_analyses/my_notebook.ipynb\")\n",
     "transform.source_code  # source code\n",
     "transform.latest_run.report  # report of latest run\n",
     "transform.latest_run.environment  # environment of latest run\n",
@@ -94,15 +92,15 @@
     "On the hub, search or filter the `transform` page and then load a script or notebook on the CLI. For example,\n",
     "\n",
     "```bash\n",
-    "lamin load https://lamin.ai/laminlabs/lamindata/transform/13VINnFk89PE0004\n",
+    "lamin load https://lamin.ai/laminlabs/lamindata/transform/13VINnFk89PE\n",
     "```"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Sync scripts with GitHub"
+    "## Sync scripts with git"
    ]
   },
   {
@@ -348,7 +346,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "py312",
    "language": "python",
    "name": "python3"
   },
@@ -362,7 +360,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.12.8"
   },
   "nbproject": {
    "id": "9priar0hoE5u",

diff --git a/lamindb/_record.py b/lamindb/_record.py
@@ -7,10 +7,17 @@
 
 import dj_database_url
 import lamindb_setup as ln_setup
-from django.core.exceptions import FieldDoesNotExist
 from django.core.exceptions import ValidationError as DjangoValidationError
 from django.db import connections, transaction
-from django.db.models import F, IntegerField, Manager, Q, QuerySet, TextField, Value
+from django.db.models import (
+    F,
+    IntegerField,
+    Manager,
+    Q,
+    QuerySet,
+    TextField,
+    Value,
+)
 from django.db.models.functions import Cast, Coalesce
 from django.db.models.lookups import (
     Contains,
@@ -36,6 +43,7 @@
 from lamindb.base.validation import FieldValidationError
 from lamindb.models import (
     Artifact,
+    CanCurate,
     Collection,
     Feature,
     FeatureSet,
@@ -155,7 +163,13 @@ def __init__(record: Record, *args, **kwargs):
         has_consciously_provided_uid = False
         if "_has_consciously_provided_uid" in kwargs:
             has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
-        if settings.creation.search_names and not has_consciously_provided_uid:
+        if (
+            isinstance(
+                record, (CanCurate, Collection, Transform)
+            )  # Collection is only temporary because it'll get a key field
+            and settings.creation.search_names
+            and not has_consciously_provided_uid
+        ):
             name_field = getattr(record, "_name_field", "name")
             match = suggest_records_with_similar_names(record, name_field, kwargs)
             if match:

diff --git a/lamindb/core/_data.py b/lamindb/core/_data.py
@@ -388,7 +388,7 @@ def add_labels(
 
 def _track_run_input(
     data: Artifact | Collection | Iterable[Artifact] | Iterable[Collection],
-    is_run_input: bool | None = None,
+    is_run_input: bool | Run | None = None,
     run: Run | None = None,
 ):
     # this is an internal hack right now for project-flow, but we can allow this

diff --git a/lamindb/models.py b/lamindb/models.py
@@ -1710,7 +1710,7 @@ class Meta(Record.Meta, TracksRun.Meta):
     """The JSON-like value."""
 
 
-class FeatureSet(Record, TracksRun):
+class FeatureSet(Record, CanCurate, TracksRun):
     """Feature sets.
 
     Stores references to sets of :class:`~lamindb.Feature` and other registries
@@ -1724,7 +1724,7 @@ class FeatureSet(Record, TracksRun):
            your artifacts against one feature set and only need to store 1M
            instead of 1M x 20k = 20B links.
         2. Interpretation: Model protein panels, gene panels, etc.
-        3. Data integration: Feature sets provide the currency that determines whether two collections can be easily concatenated.
+        3. Data integration: Feature sets provide the information that determines whether two datasets can be meaningfully concatenated.
 
         These reasons do not hold for label sets. Hence, LaminDB does not model label sets.
 

diff --git a/tests/core/test_feature_set.py b/tests/core/test_feature_set.py
@@ -78,11 +78,11 @@ def test_feature_set_from_values():
             ["weird_name"], field=ln.Feature.name, type="float"
         )
     with pytest.raises(ValidationError):
-        ln.FeatureSet.from_values([1], field=ln.ULabel.name, type="float")
+        ln.FeatureSet.from_values([1], field=ln.Feature.name, type="float")
 
     # return none if no validated features
     with pytest.raises(ValidationError):
-        ln.FeatureSet.from_values(["name"], field=ln.ULabel.name, type="float")
+        ln.FeatureSet.from_values(["name"], field=ln.Feature.name, type="float")
 
 
 def test_feature_set_from_records(df):

diff --git a/tests/core/test_record.py b/tests/core/test_record.py
@@ -165,8 +165,15 @@ def test_suggest_similar_names():
 
 
 def test_pass_version():
-    transform = ln.Transform(name="mytransform", version="1")
-    transform.save()
+    # creating a new transform on key bumps the version uid
+    # hence we'll get an error if we don't also increment the semantic version
+    ln.Transform(key="mytransform", version="1").save()
+    with pytest.raises(ValueError, match="Please increment the previous version"):
+        ln.Transform(key="mytransform", version="1")
+    # creating a new transform on name retrieves the same transform
+    # upon re-naming to description, this will be unintuitive, but I fear
+    # we need it nonetheless to maintain backward-compat
+    transform = ln.Transform(name="mytransform", version="1").save()
     assert ln.Transform(name="mytransform", version="1") == transform