From 2e79229042b859a817c4e75f54deb4b62ad294c5 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 00:10:23 -0600
Subject: [PATCH 01/10] feat: Add external storage and filepath migration
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add migrate_external() and migrate_filepath() to datajoint.migrate module
for safe migration of 0.x external storage columns to 2.0 JSON format.

Migration strategy:
1. Add new <column>_v2 columns with JSON type
2. Copy and convert data from old columns
3. User verifies data accessible via DataJoint 2.0
4. Finalize: rename columns (old → _v1, new → original)

This allows 0.x and 2.0 to coexist during migration and provides
rollback capability if issues are discovered.

Functions:
- migrate_external(schema, dry_run=True, finalize=False)
- migrate_filepath(schema, dry_run=True, finalize=False)
- _find_external_columns(schema) - detect 0.x external columns
- _find_filepath_columns(schema) - detect 0.x filepath columns

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/migrate.py | 555 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 555 insertions(+)
diff --git a/src/datajoint/migrate.py b/src/datajoint/migrate.py
index 3a2bf2ce6..b72896d0a 100644
--- a/src/datajoint/migrate.py
+++ b/src/datajoint/migrate.py
@@ -8,6 +8,7 @@
 
 from __future__ import annotations
 
+import json
 import logging
 import re
 from typing import TYPE_CHECKING
@@ -19,6 +20,14 @@
 
 logger = logging.getLogger(__name__.split(".")[0])
 
+# Patterns for detecting 0.x external storage columns
+EXTERNAL_PATTERNS = {
+    "blob": re.compile(r":external(?:-([a-zA-Z_][a-zA-Z0-9_]*))?:", re.I),
+    "attach": re.compile(r":external-attach(?:-([a-zA-Z_][a-zA-Z0-9_]*))?:", re.I),
+}
+
+FILEPATH_PATTERN = re.compile(r":filepath(?:-([a-zA-Z_][a-zA-Z0-9_]*))?:", re.I)
+
 # Pattern to detect blob types
 BLOB_TYPES = re.compile(r"^(tiny|small|medium|long|)blob$", re.I)
 
@@ -450,3 +459,549 @@ def add_job_metadata_columns(target, dry_run: bool = True) -> dict:
         result["details"].append(table_detail)
 
     return result
+
+
+# =============================================================================
+# External Storage Migration (Phase 6)
+# =============================================================================
+
+
+def _find_external_columns(schema: Schema) -> list[dict]:
+    """
+    Find columns using 0.x external storage format.
+
+    Returns list of dicts with column info and detected store name.
+    """
+    connection = schema.connection
+    results = []
+
+    # Get all tables (excluding hidden tables)
+    tables_query = """
+        SELECT TABLE_NAME
+        FROM information_schema.TABLES
+        WHERE TABLE_SCHEMA = %s
+        AND TABLE_TYPE = 'BASE TABLE'
+        AND TABLE_NAME NOT LIKE '~%%'
+    """
+    tables = connection.query(tables_query, args=(schema.database,)).fetchall()
+
+    for (table_name,) in tables:
+        # Find BINARY(16) columns (0.x external storage format)
+        columns_query = """
+            SELECT COLUMN_NAME, COLUMN_TYPE, COLUMN_COMMENT
+            FROM information_schema.COLUMNS
+            WHERE TABLE_SCHEMA = %s
+            AND TABLE_NAME = %s
+            AND DATA_TYPE = 'binary'
+            AND CHARACTER_MAXIMUM_LENGTH = 16
+        """
+        columns = connection.query(
+            columns_query, args=(schema.database, table_name)
+        ).fetchall()
+
+        for column_name, column_type, comment in columns:
+            comment = comment or ""
+
+            # Check for external blob pattern
+            blob_match = EXTERNAL_PATTERNS["blob"].search(comment)
+            if blob_match:
+                store_name = blob_match.group(1) or "external"
+                results.append({
+                    "table_name": table_name,
+                    "column_name": column_name,
+                    "column_type": column_type,
+                    "comment": comment,
+                    "store_name": store_name,
+                    "external_type": "blob",
+                })
+                continue
+
+            # Check for external attach pattern
+            attach_match = EXTERNAL_PATTERNS["attach"].search(comment)
+            if attach_match:
+                store_name = attach_match.group(1) or "external"
+                results.append({
+                    "table_name": table_name,
+                    "column_name": column_name,
+                    "column_type": column_type,
+                    "comment": comment,
+                    "store_name": store_name,
+                    "external_type": "attach",
+                })
+
+    return results
+
+
+def _find_filepath_columns(schema: Schema) -> list[dict]:
+    """
+    Find columns using 0.x filepath format.
+
+    Returns list of dicts with column info and detected store name.
+    """
+    connection = schema.connection
+    results = []
+
+    # Get all tables (excluding hidden tables)
+    tables_query = """
+        SELECT TABLE_NAME
+        FROM information_schema.TABLES
+        WHERE TABLE_SCHEMA = %s
+        AND TABLE_TYPE = 'BASE TABLE'
+        AND TABLE_NAME NOT LIKE '~%%'
+    """
+    tables = connection.query(tables_query, args=(schema.database,)).fetchall()
+
+    for (table_name,) in tables:
+        # Find VARCHAR columns with :filepath: in comment
+        columns_query = """
+            SELECT COLUMN_NAME, COLUMN_TYPE, COLUMN_COMMENT
+            FROM information_schema.COLUMNS
+            WHERE TABLE_SCHEMA = %s
+            AND TABLE_NAME = %s
+            AND DATA_TYPE = 'varchar'
+            AND COLUMN_COMMENT LIKE '%%:filepath%%'
+        """
+        columns = connection.query(
+            columns_query, args=(schema.database, table_name)
+        ).fetchall()
+
+        for column_name, column_type, comment in columns:
+            comment = comment or ""
+            match = FILEPATH_PATTERN.search(comment)
+            if match:
+                store_name = match.group(1) or "external"
+                results.append({
+                    "table_name": table_name,
+                    "column_name": column_name,
+                    "column_type": column_type,
+                    "comment": comment,
+                    "store_name": store_name,
+                })
+
+    return results
+
+
+def migrate_external(
+    schema: Schema,
+    dry_run: bool = True,
+    finalize: bool = False,
+) -> dict:
+    """
+    Migrate external storage columns from 0.x to 2.0 format.
+
+    This migration uses a safe, multi-step approach:
+
+    1. **Initial run** (dry_run=False): Adds new `<column>_v2` columns with JSON
+       type and copies data from the old columns, converting UUID references to
+       JSON metadata.
+
+    2. **Verification**: You verify all data is accessible via DataJoint 2.0.
+
+    3. **Finalize** (finalize=True): Renames columns (old → `_v1`, new → original
+       name) and optionally drops the old columns.
+
+    This allows 0.x and 2.0 to coexist during migration and provides a rollback
+    path if issues are discovered.
+
+    Parameters
+    ----------
+    schema : Schema
+        The DataJoint schema to migrate.
+    dry_run : bool, optional
+        If True, only preview changes without applying. Default True.
+    finalize : bool, optional
+        If True, rename migrated columns to original names and drop old columns.
+        Only run after verifying migration succeeded. Default False.
+
+    Returns
+    -------
+    dict
+        Migration results with keys:
+
+        - columns_found: Number of external columns found
+        - columns_migrated: Number of columns processed
+        - rows_migrated: Number of rows with data converted
+        - details: Per-column migration details
+
+    Examples
+    --------
+    >>> from datajoint.migration import migrate_external
+    >>>
+    >>> # Step 1: Preview
+    >>> result = migrate_external(schema, dry_run=True)
+    >>> print(f"Found {result['columns_found']} columns to migrate")
+    >>>
+    >>> # Step 2: Run migration (adds new columns)
+    >>> result = migrate_external(schema, dry_run=False)
+    >>> print(f"Migrated {result['rows_migrated']} rows")
+    >>>
+    >>> # Step 3: Verify data is accessible via DataJoint 2.0
+    >>> # ... manual verification ...
+    >>>
+    >>> # Step 4: Finalize (rename columns, drop old)
+    >>> result = migrate_external(schema, finalize=True)
+
+    Notes
+    -----
+    The migration reads from the hidden `~external_<store>` tables to build
+    JSON metadata. Ensure store configuration in datajoint.json matches the
+    paths stored in these tables.
+    """
+    columns = _find_external_columns(schema)
+    connection = schema.connection
+    database = schema.database
+
+    result = {
+        "columns_found": len(columns),
+        "columns_migrated": 0,
+        "rows_migrated": 0,
+        "details": [],
+    }
+
+    if not columns:
+        logger.info(f"No external columns found in {database}")
+        return result
+
+    for col in columns:
+        table_name = col["table_name"]
+        column_name = col["column_name"]
+        store_name = col["store_name"]
+        external_type = col["external_type"]
+        old_comment = col["comment"]
+
+        detail = {
+            "table": f"{database}.{table_name}",
+            "column": column_name,
+            "store": store_name,
+            "type": external_type,
+            "status": "pending",
+            "rows": 0,
+        }
+
+        # Build new comment
+        codec = "blob" if external_type == "blob" else "attach"
+        # Remove old :external...: pattern from comment
+        new_comment = EXTERNAL_PATTERNS[external_type].sub("", old_comment).strip()
+        new_comment = f":{codec}@{store_name}: {new_comment}".strip()
+
+        new_column = f"{column_name}_v2"
+
+        if finalize:
+            # Finalize: rename columns
+            detail["action"] = "finalize"
+
+            if dry_run:
+                logger.info(
+                    f"Would finalize {database}.{table_name}.{column_name}: "
+                    f"rename {column_name} → {column_name}_v1, "
+                    f"{new_column} → {column_name}"
+                )
+                detail["status"] = "dry_run"
+            else:
+                try:
+                    # Rename old column to _v1
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"CHANGE COLUMN `{column_name}` `{column_name}_v1` "
+                        f"{col['column_type']} COMMENT 'legacy 0.x'"
+                    )
+                    connection.query(sql)
+
+                    # Rename new column to original name
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"CHANGE COLUMN `{new_column}` `{column_name}` "
+                        f"JSON COMMENT '{new_comment}'"
+                    )
+                    connection.query(sql)
+
+                    detail["status"] = "finalized"
+                    result["columns_migrated"] += 1
+                    logger.info(f"Finalized {database}.{table_name}.{column_name}")
+                except Exception as e:
+                    detail["status"] = "error"
+                    detail["error"] = str(e)
+                    logger.error(f"Failed to finalize {table_name}.{column_name}: {e}")
+                    raise DataJointError(f"Finalize failed: {e}") from e
+        else:
+            # Initial migration: add new column and copy data
+            detail["action"] = "migrate"
+
+            # Check if _v2 column already exists
+            existing = connection.query(
+                """
+                SELECT COLUMN_NAME FROM information_schema.COLUMNS
+                WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s
+                """,
+                args=(database, table_name, new_column),
+            ).fetchone()
+
+            if existing:
+                detail["status"] = "already_migrated"
+                logger.info(f"Column {new_column} already exists, skipping")
+                result["details"].append(detail)
+                continue
+
+            if dry_run:
+                # Count rows that would be migrated
+                count_sql = f"""
+                    SELECT COUNT(*) FROM `{database}`.`{table_name}`
+                    WHERE `{column_name}` IS NOT NULL
+                """
+                count = connection.query(count_sql).fetchone()[0]
+                detail["rows"] = count
+                detail["status"] = "dry_run"
+                logger.info(
+                    f"Would migrate {database}.{table_name}.{column_name}: "
+                    f"{count} rows, store={store_name}"
+                )
+            else:
+                try:
+                    # Add new JSON column
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"ADD COLUMN `{new_column}` JSON "
+                        f"COMMENT '{new_comment}'"
+                    )
+                    connection.query(sql)
+
+                    # Copy and convert data from old column
+                    # Query the external table for metadata
+                    external_table = f"~external_{store_name}"
+
+                    # Get store config for URL building
+                    from .settings import config
+                    store_config = config.get("stores", {}).get(store_name, {})
+                    protocol = store_config.get("protocol", "file")
+                    location = store_config.get("location", "")
+
+                    # Update rows with JSON metadata
+                    update_sql = f"""
+                        UPDATE `{database}`.`{table_name}` t
+                        JOIN `{database}`.`{external_table}` e
+                        ON t.`{column_name}` = e.hash
+                        SET t.`{new_column}` = JSON_OBJECT(
+                            'url', CONCAT('{protocol}://', '{location}/', e.filepath),
+                            'size', e.size,
+                            'hash', HEX(e.hash)
+                        )
+                        WHERE t.`{column_name}` IS NOT NULL
+                    """
+                    connection.query(update_sql)
+
+                    # Count migrated rows
+                    count_sql = f"""
+                        SELECT COUNT(*) FROM `{database}`.`{table_name}`
+                        WHERE `{new_column}` IS NOT NULL
+                    """
+                    count = connection.query(count_sql).fetchone()[0]
+                    detail["rows"] = count
+                    detail["status"] = "migrated"
+                    result["columns_migrated"] += 1
+                    result["rows_migrated"] += count
+
+                    logger.info(
+                        f"Migrated {database}.{table_name}.{column_name}: "
+                        f"{count} rows"
+                    )
+                except Exception as e:
+                    detail["status"] = "error"
+                    detail["error"] = str(e)
+                    logger.error(
+                        f"Failed to migrate {table_name}.{column_name}: {e}"
+                    )
+                    raise DataJointError(f"Migration failed: {e}") from e
+
+        result["details"].append(detail)
+
+    return result
+
+
+def migrate_filepath(
+    schema: Schema,
+    dry_run: bool = True,
+    finalize: bool = False,
+) -> dict:
+    """
+    Migrate filepath columns from 0.x to 2.0 format.
+
+    Same multi-step approach as migrate_external:
+
+    1. **Initial run**: Adds new `<column>_v2` columns with JSON type
+    2. **Verification**: Verify files accessible via DataJoint 2.0
+    3. **Finalize**: Rename columns and drop old
+
+    Parameters
+    ----------
+    schema : Schema
+        The DataJoint schema to migrate.
+    dry_run : bool, optional
+        If True, only preview changes. Default True.
+    finalize : bool, optional
+        If True, finalize migration. Default False.
+
+    Returns
+    -------
+    dict
+        Migration results (same format as migrate_external).
+
+    Examples
+    --------
+    >>> from datajoint.migration import migrate_filepath
+    >>>
+    >>> # Preview
+    >>> result = migrate_filepath(schema, dry_run=True)
+    >>>
+    >>> # Run migration
+    >>> result = migrate_filepath(schema, dry_run=False)
+    >>>
+    >>> # Finalize after verification
+    >>> result = migrate_filepath(schema, finalize=True)
+    """
+    columns = _find_filepath_columns(schema)
+    connection = schema.connection
+    database = schema.database
+
+    result = {
+        "columns_found": len(columns),
+        "columns_migrated": 0,
+        "rows_migrated": 0,
+        "details": [],
+    }
+
+    if not columns:
+        logger.info(f"No filepath columns found in {database}")
+        return result
+
+    for col in columns:
+        table_name = col["table_name"]
+        column_name = col["column_name"]
+        store_name = col["store_name"]
+        old_comment = col["comment"]
+
+        detail = {
+            "table": f"{database}.{table_name}",
+            "column": column_name,
+            "store": store_name,
+            "status": "pending",
+            "rows": 0,
+        }
+
+        # Build new comment
+        new_comment = FILEPATH_PATTERN.sub("", old_comment).strip()
+        new_comment = f":filepath@{store_name}: {new_comment}".strip()
+
+        new_column = f"{column_name}_v2"
+
+        if finalize:
+            detail["action"] = "finalize"
+
+            if dry_run:
+                logger.info(
+                    f"Would finalize {database}.{table_name}.{column_name}"
+                )
+                detail["status"] = "dry_run"
+            else:
+                try:
+                    # Rename old column to _v1
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"CHANGE COLUMN `{column_name}` `{column_name}_v1` "
+                        f"{col['column_type']} COMMENT 'legacy 0.x'"
+                    )
+                    connection.query(sql)
+
+                    # Rename new column to original name
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"CHANGE COLUMN `{new_column}` `{column_name}` "
+                        f"JSON COMMENT '{new_comment}'"
+                    )
+                    connection.query(sql)
+
+                    detail["status"] = "finalized"
+                    result["columns_migrated"] += 1
+                    logger.info(f"Finalized {database}.{table_name}.{column_name}")
+                except Exception as e:
+                    detail["status"] = "error"
+                    detail["error"] = str(e)
+                    logger.error(f"Failed to finalize: {e}")
+                    raise DataJointError(f"Finalize failed: {e}") from e
+        else:
+            detail["action"] = "migrate"
+
+            # Check if _v2 column already exists
+            existing = connection.query(
+                """
+                SELECT COLUMN_NAME FROM information_schema.COLUMNS
+                WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s AND COLUMN_NAME = %s
+                """,
+                args=(database, table_name, new_column),
+            ).fetchone()
+
+            if existing:
+                detail["status"] = "already_migrated"
+                result["details"].append(detail)
+                continue
+
+            if dry_run:
+                count_sql = f"""
+                    SELECT COUNT(*) FROM `{database}`.`{table_name}`
+                    WHERE `{column_name}` IS NOT NULL
+                """
+                count = connection.query(count_sql).fetchone()[0]
+                detail["rows"] = count
+                detail["status"] = "dry_run"
+                logger.info(
+                    f"Would migrate {database}.{table_name}.{column_name}: "
+                    f"{count} rows"
+                )
+            else:
+                try:
+                    # Get store config
+                    from .settings import config
+                    store_config = config.get("stores", {}).get(store_name, {})
+                    protocol = store_config.get("protocol", "file")
+                    location = store_config.get("location", "")
+
+                    # Add new JSON column
+                    sql = (
+                        f"ALTER TABLE `{database}`.`{table_name}` "
+                        f"ADD COLUMN `{new_column}` JSON "
+                        f"COMMENT '{new_comment}'"
+                    )
+                    connection.query(sql)
+
+                    # Convert filepath to JSON with URL
+                    update_sql = f"""
+                        UPDATE `{database}`.`{table_name}`
+                        SET `{new_column}` = JSON_OBJECT(
+                            'url', CONCAT('{protocol}://', '{location}/', `{column_name}`)
+                        )
+                        WHERE `{column_name}` IS NOT NULL
+                    """
+                    connection.query(update_sql)
+
+                    count_sql = f"""
+                        SELECT COUNT(*) FROM `{database}`.`{table_name}`
+                        WHERE `{new_column}` IS NOT NULL
+                    """
+                    count = connection.query(count_sql).fetchone()[0]
+                    detail["rows"] = count
+                    detail["status"] = "migrated"
+                    result["columns_migrated"] += 1
+                    result["rows_migrated"] += count
+
+                    logger.info(
+                        f"Migrated {database}.{table_name}.{column_name}: "
+                        f"{count} rows"
+                    )
+                except Exception as e:
+                    detail["status"] = "error"
+                    detail["error"] = str(e)
+                    logger.error(f"Failed to migrate: {e}")
+                    raise DataJointError(f"Migration failed: {e}") from e
+
+        result["details"].append(detail)
+
+    return result

From 08d5c6aaf2ac707538917e6c6019ac6efcc336b7 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 16:03:13 -0600
Subject: [PATCH 02/10] feat: Add NpyCodec for lazy-loading numpy arrays

Implement the `<npy@>` codec for schema-addressed numpy array storage:

- Add SchemaCodec base class for path-addressed storage codecs
- Add NpyRef class for lazy array references with metadata
- Add NpyCodec using .npy format with shape/dtype inspection
- Refactor ObjectCodec to inherit from SchemaCodec
- Rename is_external to is_store throughout codebase
- Export SchemaCodec and NpyRef from public API
- Bump version to 2.0.0a17

Key features:
- Lazy loading: inspect shape/dtype without downloading
- NumPy integration via __array__ protocol
- Safe bulk fetch: returns NpyRef objects, not arrays
- Schema-addressed paths: {schema}/{table}/{pk}/{attr}.npy

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/__init__.py            |   9 +-
 src/datajoint/builtin_codecs.py      | 570 ++++++++++++++++++++++++---
 src/datajoint/codecs.py              |  18 +-
 src/datajoint/declare.py             |   4 +-
 src/datajoint/heading.py             |   6 +-
 src/datajoint/migrate.py             |  91 ++---
 src/datajoint/schemas.py             |  30 +-
 src/datajoint/version.py             |   2 +-
 tests/integration/test_codecs.py     |   4 +-
 tests/integration/test_npy_codec.py  | 439 +++++++++++++++++++++
 tests/integration/test_privileges.py |   4 +-
 tests/integration/test_schema.py     |  12 +-
 tests/schema_codecs.py               |   4 +-
 tests/unit/test_codecs.py            |   4 +-
 14 files changed, 1048 insertions(+), 149 deletions(-)
 create mode 100644 tests/integration/test_npy_codec.py

diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py
index ae8d308d2..3a049e110 100644
--- a/src/datajoint/__init__.py
+++ b/src/datajoint/__init__.py
@@ -47,8 +47,12 @@
     "MatStruct",
     # Codec API
     "Codec",
+    "SchemaCodec",
     "list_codecs",
     "get_codec",
+    "ObjectRef",
+    "NpyRef",
+    # Other
     "errors",
     "migrate",
     "DataJointError",
@@ -56,7 +60,6 @@
     "key_hash",
     "logger",
     "cli",
-    "ObjectRef",
     "ValidationResult",
 ]
 
@@ -70,6 +73,10 @@
     get_codec,
     list_codecs,
 )
+from .builtin_codecs import (
+    SchemaCodec,
+    NpyRef,
+)
 from .blob import MatCell, MatStruct
 from .connection import Connection, conn
 from .errors import DataJointError
diff --git a/src/datajoint/builtin_codecs.py b/src/datajoint/builtin_codecs.py
index 66589dc36..499fec846 100644
--- a/src/datajoint/builtin_codecs.py
+++ b/src/datajoint/builtin_codecs.py
@@ -11,6 +11,7 @@
     - ``<object>``: Path-addressed storage for files/folders (Zarr, HDF5)
     - ``<attach>``: File attachment (internal) or external with dedup
     - ``<filepath@store>``: Reference to existing file in store
+    - ``<npy@>``: Store numpy arrays as portable .npy files (external only)
 
 Example - Creating a Custom Codec:
     Here's how to define your own codec, modeled after the built-in codecs::
@@ -23,7 +24,7 @@ class GraphCodec(dj.Codec):
 
             name = "graph"  # Use as <graph> in definitions
 
-            def get_dtype(self, is_external: bool) -> str:
+            def get_dtype(self, is_store: bool) -> str:
                 return "<blob>"  # Compose with blob for serialization
 
             def encode(self, graph, *, key=None, store_name=None):
@@ -102,9 +103,9 @@ class ProcessedData(dj.Manual):
 
     name = "blob"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Return bytes for internal, <hash> for external storage."""
-        return "<hash>" if is_external else "bytes"
+        return "<hash>" if is_store else "bytes"
 
     def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
         """Serialize a Python object to DataJoint's blob format."""
@@ -157,9 +158,9 @@ class RawContent(dj.Manual):
 
     name = "hash"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Hash storage is external only."""
-        if not is_external:
+        if not is_store:
             raise DataJointError("<hash> requires @ (external storage only)")
         return "json"
 
@@ -212,26 +213,186 @@ def validate(self, value: Any) -> None:
 
 
 # =============================================================================
-# Path-Addressed Storage Codec (OAS - Object-Augmented Schema)
+# Schema-Addressed Storage Base Class
 # =============================================================================
 
 
-class ObjectCodec(Codec):
+class SchemaCodec(Codec, register=False):
     """
-    Path-addressed storage for files and folders.
+    Abstract base class for schema-addressed codecs.
 
-    The ``<object@>`` codec provides managed file/folder storage where the path
-    is derived from the primary key: ``{schema}/{table}/{pk}/{field}/``
+    Schema-addressed storage is an OAS (Object-Augmented Schema) addressing
+    scheme where paths mirror the database schema structure:
+    ``{schema}/{table}/{pk}/{attribute}``. This creates a browsable
+    organization in object storage that reflects the schema design.
 
-    Unlike ``<hash@>`` (hash-addressed), each row has its own storage path,
-    and content is deleted when the row is deleted. This is ideal for:
+    Subclasses must implement:
+        - ``name``: Codec name for ``<name@>`` syntax
+        - ``encode()``: Serialize and upload content
+        - ``decode()``: Create lazy reference from metadata
+        - ``validate()``: Validate input values
+
+    Helper Methods:
+        - ``_extract_context()``: Parse key dict into schema/table/field/pk
+        - ``_build_path()``: Construct storage path from context
+        - ``_get_backend()``: Get storage backend by name
+
+    Comparison with Hash-addressed:
+        - **Schema-addressed** (this): Path from schema structure, no dedup
+        - **Hash-addressed**: Path from content hash, automatic dedup
+
+    Example::
+
+        class MyCodec(SchemaCodec):
+            name = "my"
+
+            def encode(self, value, *, key=None, store_name=None):
+                schema, table, field, pk = self._extract_context(key)
+                path, _ = self._build_path(schema, table, field, pk, ext=".dat")
+                backend = self._get_backend(store_name)
+                backend.put_buffer(serialize(value), path)
+                return {"path": path, "store": store_name, ...}
+
+            def decode(self, stored, *, key=None):
+                backend = self._get_backend(stored.get("store"))
+                return MyRef(stored, backend)
+
+    See Also
+    --------
+    HashCodec : Hash-addressed storage with content deduplication.
+    ObjectCodec : Schema-addressed storage for files/folders.
+    NpyCodec : Schema-addressed storage for numpy arrays.
+    """
+
+    def get_dtype(self, is_store: bool) -> str:
+        """
+        Return storage dtype. Schema-addressed codecs require @ modifier.
+
+        Parameters
+        ----------
+        is_store : bool
+            Must be True for schema-addressed codecs.
+
+        Returns
+        -------
+        str
+            "json" for metadata storage.
+
+        Raises
+        ------
+        DataJointError
+            If is_store is False (@ modifier missing).
+        """
+        if not is_store:
+            raise DataJointError(f"<{self.name}> requires @ (store only)")
+        return "json"
+
+    def _extract_context(self, key: dict | None) -> tuple[str, str, str, dict]:
+        """
+        Extract schema, table, field, and primary key from context dict.
+
+        Parameters
+        ----------
+        key : dict or None
+            Context dict with ``_schema``, ``_table``, ``_field``,
+            and primary key values.
+
+        Returns
+        -------
+        tuple[str, str, str, dict]
+            ``(schema, table, field, primary_key)``
+        """
+        key = dict(key) if key else {}
+        schema = key.pop("_schema", "unknown")
+        table = key.pop("_table", "unknown")
+        field = key.pop("_field", "data")
+        primary_key = {k: v for k, v in key.items() if not k.startswith("_")}
+        return schema, table, field, primary_key
+
+    def _build_path(
+        self,
+        schema: str,
+        table: str,
+        field: str,
+        primary_key: dict,
+        ext: str | None = None,
+    ) -> tuple[str, str]:
+        """
+        Build schema-addressed storage path.
+
+        Constructs a path that mirrors the database schema structure:
+        ``{schema}/{table}/{pk_values}/{field}{ext}``
+
+        Parameters
+        ----------
+        schema : str
+            Schema name.
+        table : str
+            Table name.
+        field : str
+            Field/attribute name.
+        primary_key : dict
+            Primary key values.
+        ext : str, optional
+            File extension (e.g., ".npy", ".zarr").
+
+        Returns
+        -------
+        tuple[str, str]
+            ``(path, token)`` where path is the storage path and token
+            is a unique identifier.
+        """
+        from .storage import build_object_path
+
+        return build_object_path(
+            schema=schema,
+            table=table,
+            field=field,
+            primary_key=primary_key,
+            ext=ext,
+        )
+
+    def _get_backend(self, store_name: str | None = None):
+        """
+        Get storage backend by name.
+
+        Parameters
+        ----------
+        store_name : str, optional
+            Store name. If None, returns default store.
+
+        Returns
+        -------
+        StorageBackend
+            Storage backend instance.
+        """
+        from .content_registry import get_store_backend
+
+        return get_store_backend(store_name)
+
+
+# =============================================================================
+# Object Codec (Schema-Addressed Files/Folders)
+# =============================================================================
+
+
+class ObjectCodec(SchemaCodec):
+    """
+    Schema-addressed storage for files and folders.
+
+    The ``<object@>`` codec provides managed file/folder storage using
+    schema-addressed paths: ``{schema}/{table}/{pk}/{field}/``. This creates
+    a browsable organization in object storage that mirrors the database schema.
+
+    Unlike hash-addressed storage (``<hash@>``), each row has its own path
+    and content is deleted when the row is deleted. Ideal for:
 
     - Zarr arrays (hierarchical chunked data)
     - HDF5 files
     - Complex multi-file outputs
     - Any content that shouldn't be deduplicated
 
-    External only - requires @ modifier.
+    Store only - requires @ modifier.
 
     Example::
 
@@ -258,24 +419,24 @@ def make(self, key):
 
             {store_root}/{schema}/{table}/{pk}/{field}/
 
-    Comparison with ``<hash@>``::
+    Comparison with hash-addressed::
 
-        | Aspect         | <object@>         | <hash@>             |
-        |----------------|-------------------|---------------------|
-        | Addressing     | Path (by PK)      | Hash (by content)   |
-        | Deduplication  | No                | Yes                 |
-        | Deletion       | With row          | GC when unreferenced|
-        | Use case       | Zarr, HDF5        | Blobs, attachments  |
+        | Aspect         | <object@>           | <hash@>             |
+        |----------------|---------------------|---------------------|
+        | Addressing     | Schema-addressed    | Hash-addressed      |
+        | Deduplication  | No                  | Yes                 |
+        | Deletion       | With row            | GC when unreferenced|
+        | Use case       | Zarr, HDF5          | Blobs, attachments  |
+
+    See Also
+    --------
+    SchemaCodec : Base class for schema-addressed codecs.
+    NpyCodec : Schema-addressed storage for numpy arrays.
+    HashCodec : Hash-addressed storage with deduplication.
     """
 
     name = "object"
 
-    def get_dtype(self, is_external: bool) -> str:
-        """Object storage is external only."""
-        if not is_external:
-            raise DataJointError("<object> requires @ (external storage only)")
-        return "json"
-
     def encode(
         self,
         value: Any,
@@ -304,15 +465,8 @@ def encode(
         from datetime import datetime, timezone
         from pathlib import Path
 
-        from .content_registry import get_store_backend
-        from .storage import build_object_path
-
-        # Extract context from key
-        key = key or {}
-        schema = key.pop("_schema", "unknown")
-        table = key.pop("_table", "unknown")
-        field = key.pop("_field", "data")
-        primary_key = {k: v for k, v in key.items() if not k.startswith("_")}
+        # Extract context using inherited helper
+        schema, table, field, primary_key = self._extract_context(key)
 
         # Check for pre-computed metadata (from staged insert)
         if isinstance(value, dict) and "path" in value:
@@ -353,17 +507,11 @@ def encode(
         else:
             raise TypeError(f"<object> expects bytes or path, got {type(value).__name__}")
 
-        # Build storage path
-        path, token = build_object_path(
-            schema=schema,
-            table=table,
-            field=field,
-            primary_key=primary_key,
-            ext=ext,
-        )
+        # Build storage path using inherited helper
+        path, token = self._build_path(schema, table, field, primary_key, ext=ext)
 
-        # Get storage backend
-        backend = get_store_backend(store_name)
+        # Get storage backend using inherited helper
+        backend = self._get_backend(store_name)
 
         # Upload content
         if is_dir:
@@ -406,10 +554,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
             Handle for accessing the stored content.
         """
         from .objectref import ObjectRef
-        from .content_registry import get_store_backend
 
-        store_name = stored.get("store")
-        backend = get_store_backend(store_name)
+        backend = self._get_backend(stored.get("store"))
         return ObjectRef.from_json(stored, backend=backend)
 
     def validate(self, value: Any) -> None:
@@ -472,9 +618,9 @@ class Documents(dj.Manual):
 
     name = "attach"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Return bytes for internal, <hash> for external storage."""
-        return "<hash>" if is_external else "bytes"
+        return "<hash>" if is_store else "bytes"
 
     def encode(self, value: Any, *, key: dict | None = None, store_name: str | None = None) -> bytes:
         """
@@ -610,9 +756,9 @@ class Recordings(dj.Manual):
 
     name = "filepath"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Filepath is external only."""
-        if not is_external:
+        if not is_store:
             raise DataJointError("<filepath> requires @store")
         return "json"
 
@@ -688,3 +834,323 @@ def validate(self, value: Any) -> None:
 
         if not isinstance(value, (str, Path)):
             raise TypeError(f"<filepath> expects a path string or Path, got {type(value).__name__}")
+
+
+# =============================================================================
+# NumPy Array Codec (.npy format)
+# =============================================================================
+
+
+class NpyRef:
+    """
+    Lazy reference to a numpy array stored as a .npy file.
+
+    This class provides metadata access without I/O and transparent
+    integration with numpy operations via the ``__array__`` protocol.
+
+    Attributes
+    ----------
+    shape : tuple[int, ...]
+        Array shape (from metadata, no I/O).
+    dtype : numpy.dtype
+        Array dtype (from metadata, no I/O).
+    path : str
+        Storage path within the store.
+    store : str or None
+        Store name (None for default).
+
+    Examples
+    --------
+    Metadata access without download::
+
+        ref = (Recording & key).fetch1('waveform')
+        print(ref.shape)  # (1000, 32) - no download
+        print(ref.dtype)  # float64 - no download
+
+    Explicit loading::
+
+        arr = ref.load()  # Downloads and returns np.ndarray
+
+    Transparent numpy integration::
+
+        # These all trigger automatic download via __array__
+        result = ref + 1
+        result = np.mean(ref)
+        result = ref[0:100]  # Slicing works too
+    """
+
+    __slots__ = ("_meta", "_backend", "_cached")
+
+    def __init__(self, metadata: dict, backend: Any):
+        """
+        Initialize NpyRef from metadata and storage backend.
+
+        Parameters
+        ----------
+        metadata : dict
+            JSON metadata containing path, store, dtype, shape.
+        backend : StorageBackend
+            Storage backend for file operations.
+        """
+        self._meta = metadata
+        self._backend = backend
+        self._cached = None
+
+    @property
+    def shape(self) -> tuple:
+        """Array shape (no I/O required)."""
+        return tuple(self._meta["shape"])
+
+    @property
+    def dtype(self):
+        """Array dtype (no I/O required)."""
+        import numpy as np
+
+        return np.dtype(self._meta["dtype"])
+
+    @property
+    def ndim(self) -> int:
+        """Number of dimensions (no I/O required)."""
+        return len(self._meta["shape"])
+
+    @property
+    def size(self) -> int:
+        """Total number of elements (no I/O required)."""
+        import math
+
+        return math.prod(self._meta["shape"])
+
+    @property
+    def nbytes(self) -> int:
+        """Total bytes (estimated from shape and dtype, no I/O required)."""
+        return self.size * self.dtype.itemsize
+
+    @property
+    def path(self) -> str:
+        """Storage path within the store."""
+        return self._meta["path"]
+
+    @property
+    def store(self) -> str | None:
+        """Store name (None for default store)."""
+        return self._meta.get("store")
+
+    @property
+    def is_loaded(self) -> bool:
+        """True if array data has been downloaded and cached."""
+        return self._cached is not None
+
+    def load(self):
+        """
+        Download and return the array.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array data.
+
+        Notes
+        -----
+        The array is cached after first load. Subsequent calls return
+        the cached copy without additional I/O.
+        """
+        import io
+
+        import numpy as np
+
+        if self._cached is None:
+            buffer = self._backend.get_buffer(self.path)
+            self._cached = np.load(io.BytesIO(buffer), allow_pickle=False)
+        return self._cached
+
+    def __array__(self, dtype=None):
+        """
+        NumPy array protocol for transparent integration.
+
+        This method is called automatically when the NpyRef is used
+        in numpy operations (arithmetic, ufuncs, etc.).
+
+        Parameters
+        ----------
+        dtype : numpy.dtype, optional
+            Desired output dtype.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array data, optionally cast to dtype.
+        """
+        arr = self.load()
+        if dtype is not None:
+            return arr.astype(dtype)
+        return arr
+
+    def __getitem__(self, key):
+        """Support indexing/slicing by loading then indexing."""
+        return self.load()[key]
+
+    def __len__(self) -> int:
+        """Length of first dimension."""
+        if not self._meta["shape"]:
+            raise TypeError("len() of 0-dimensional array")
+        return self._meta["shape"][0]
+
+    def __repr__(self) -> str:
+        status = "loaded" if self.is_loaded else "not loaded"
+        return f"NpyRef(shape={self.shape}, dtype={self.dtype}, {status})"
+
+    def __str__(self) -> str:
+        return repr(self)
+
+
+class NpyCodec(SchemaCodec):
+    """
+    Schema-addressed storage for numpy arrays as .npy files.
+
+    The ``<npy@>`` codec stores numpy arrays as standard ``.npy`` files
+    using schema-addressed paths: ``{schema}/{table}/{pk}/{attribute}.npy``.
+    Arrays are fetched lazily via ``NpyRef``, which provides metadata access
+    without I/O and transparent numpy integration via ``__array__``.
+
+    Store only - requires ``@`` modifier.
+
+    Key Features:
+        - **Portable**: Standard .npy format readable by numpy, MATLAB, etc.
+        - **Lazy loading**: Metadata (shape, dtype) available without download
+        - **Transparent**: Use in numpy operations triggers automatic download
+        - **Safe bulk fetch**: Fetching many rows doesn't download until needed
+        - **Schema-addressed**: Browsable paths that mirror database structure
+
+    Example::
+
+        @schema
+        class Recording(dj.Manual):
+            definition = '''
+            recording_id : int
+            ---
+            waveform : <npy@>           # default store
+            spectrogram : <npy@archive>  # specific store
+            '''
+
+        # Insert - just pass the array
+        Recording.insert1({
+            'recording_id': 1,
+            'waveform': np.random.randn(1000, 32),
+        })
+
+        # Fetch - returns NpyRef (lazy)
+        ref = (Recording & 'recording_id=1').fetch1('waveform')
+        ref.shape   # (1000, 32) - no download
+        ref.dtype   # float64 - no download
+
+        # Use in numpy ops - downloads automatically
+        result = np.mean(ref, axis=0)
+
+        # Or load explicitly
+        arr = ref.load()
+
+    Storage Details:
+        - File format: NumPy .npy (version 1.0 or 2.0)
+        - Path: ``{schema}/{table}/{pk}/{attribute}.npy``
+        - Database column: JSON with ``{path, store, dtype, shape}``
+
+    See Also
+    --------
+    NpyRef : The lazy array reference returned on fetch.
+    SchemaCodec : Base class for schema-addressed codecs.
+    ObjectCodec : Schema-addressed storage for files/folders.
+    """
+
+    name = "npy"
+
+    def validate(self, value: Any) -> None:
+        """
+        Validate that value is a numpy array suitable for .npy storage.
+
+        Parameters
+        ----------
+        value : Any
+            Value to validate.
+
+        Raises
+        ------
+        DataJointError
+            If value is not a numpy array or has object dtype.
+        """
+        import numpy as np
+
+        if not isinstance(value, np.ndarray):
+            raise DataJointError(f"<npy> requires numpy.ndarray, got {type(value).__name__}")
+        if value.dtype == object:
+            raise DataJointError("<npy> does not support object dtype arrays")
+
+    def encode(
+        self,
+        value: Any,
+        *,
+        key: dict | None = None,
+        store_name: str | None = None,
+    ) -> dict:
+        """
+        Serialize array to .npy and upload to storage.
+
+        Parameters
+        ----------
+        value : numpy.ndarray
+            Array to store.
+        key : dict, optional
+            Context dict with ``_schema``, ``_table``, ``_field``,
+            and primary key values for path construction.
+        store_name : str, optional
+            Target store. If None, uses default store.
+
+        Returns
+        -------
+        dict
+            JSON metadata: ``{path, store, dtype, shape}``.
+        """
+        import io
+
+        import numpy as np
+
+        # Extract context using inherited helper
+        schema, table, field, primary_key = self._extract_context(key)
+
+        # Build schema-addressed storage path
+        path, _ = self._build_path(schema, table, field, primary_key, ext=".npy")
+
+        # Serialize to .npy format
+        buffer = io.BytesIO()
+        np.save(buffer, value, allow_pickle=False)
+        npy_bytes = buffer.getvalue()
+
+        # Upload to storage using inherited helper
+        backend = self._get_backend(store_name)
+        backend.put_buffer(npy_bytes, path)
+
+        # Return metadata (includes numpy-specific shape/dtype)
+        return {
+            "path": path,
+            "store": store_name,
+            "dtype": str(value.dtype),
+            "shape": list(value.shape),
+        }
+
+    def decode(self, stored: dict, *, key: dict | None = None) -> NpyRef:
+        """
+        Create lazy NpyRef from stored metadata.
+
+        Parameters
+        ----------
+        stored : dict
+            JSON metadata from database.
+        key : dict, optional
+            Primary key values (unused).
+
+        Returns
+        -------
+        NpyRef
+            Lazy array reference with metadata access and numpy integration.
+        """
+        backend = self._get_backend(stored.get("store"))
+        return NpyRef(stored, backend)
diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py
index 211308d1c..e6ab22931 100644
--- a/src/datajoint/codecs.py
+++ b/src/datajoint/codecs.py
@@ -11,7 +11,7 @@
     class GraphCodec(dj.Codec):
         name = "graph"
 
-        def get_dtype(self, is_external: bool) -> str:
+        def get_dtype(self, is_store: bool) -> str:
             return "<blob>"
 
         def encode(self, graph, *, key=None, store_name=None):
@@ -64,7 +64,7 @@ class Codec(ABC):
     >>> class GraphCodec(dj.Codec):
     ...     name = "graph"
     ...
-    ...     def get_dtype(self, is_external: bool) -> str:
+    ...     def get_dtype(self, is_store: bool) -> str:
     ...         return "<blob>"
     ...
     ...     def encode(self, graph, *, key=None, store_name=None):
@@ -120,14 +120,14 @@ def __init_subclass__(cls, *, register: bool = True, **kwargs):
         logger.debug(f"Registered codec <{cls.name}> from {cls.__module__}.{cls.__name__}")
 
     @abstractmethod
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """
         Return the storage dtype for this codec.
 
         Parameters
         ----------
-        is_external : bool
-            True if ``@`` modifier present (external storage).
+        is_store : bool
+            True if ``@`` modifier present (object store vs inline).
 
         Returns
         -------
@@ -138,7 +138,7 @@ def get_dtype(self, is_external: bool) -> str:
         Raises
         ------
         DataJointError
-            If external storage not supported but requested.
+            If store mode not supported but requested.
         """
         ...
 
@@ -450,11 +450,11 @@ def resolve_dtype(
         codec = get_codec(type_name)
         chain.append(codec)
 
-        # Determine if external based on whether @ is present
-        is_external = effective_store is not None
+        # Determine if store mode based on whether @ is present
+        is_store = effective_store is not None
 
         # Get the inner dtype from the codec
-        inner_dtype = codec.get_dtype(is_external)
+        inner_dtype = codec.get_dtype(is_store)
 
         # Recursively resolve the inner dtype, propagating store
         final_dtype, inner_chain, resolved_store = resolve_dtype(inner_dtype, seen, effective_store)
diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py
index d86e90ed9..c96dc6a84 100644
--- a/src/datajoint/declare.py
+++ b/src/datajoint/declare.py
@@ -651,8 +651,8 @@ def substitute_special_type(match: dict, category: str, foreign_key_sql: list[st
         if store_name is not None:
             match["store"] = store_name
         # Determine if external storage is used (store_name is present, even if empty string for default)
-        is_external = store_name is not None
-        inner_dtype = codec.get_dtype(is_external=is_external)
+        is_store = store_name is not None
+        inner_dtype = codec.get_dtype(is_store=is_store)
 
         # If inner dtype is a codec without store, propagate the store from outer type
         # e.g., <attach@mystore> returns <hash>, we need to resolve as <hash@mystore>
diff --git a/src/datajoint/heading.py b/src/datajoint/heading.py
index 96383170b..c2ca497fc 100644
--- a/src/datajoint/heading.py
+++ b/src/datajoint/heading.py
@@ -39,7 +39,7 @@ def __init__(self, codec_name: str):
     def name(self) -> str:
         return self._codec_name
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         raise DataJointError(
             f"Codec <{self._codec_name}> is not registered. Define a Codec subclass with name='{self._codec_name}'."
         )
@@ -450,8 +450,8 @@ def _init_from_database(self) -> None:
                     attr["codec"] = _MissingType(codec_spec)
                 else:
                     # Determine if external storage based on store presence
-                    is_external = attr.get("store") is not None
-                    attr["type"] = attr["codec"].get_dtype(is_external=is_external)
+                    is_store = attr.get("store") is not None
+                    attr["type"] = attr["codec"].get_dtype(is_store=is_store)
                     if not any(r.match(attr["type"]) for r in TYPE_PATTERN.values()):
                         raise DataJointError(f"Invalid dtype '{attr['type']}' in codec <{codec_spec}>.")
                     # Update is_blob based on resolved dtype (check both BYTES and NATIVE_BLOB patterns)
diff --git a/src/datajoint/migrate.py b/src/datajoint/migrate.py
index b72896d0a..f0e1371ad 100644
--- a/src/datajoint/migrate.py
+++ b/src/datajoint/migrate.py
@@ -8,7 +8,6 @@
 
 from __future__ import annotations
 
-import json
 import logging
 import re
 from typing import TYPE_CHECKING
@@ -495,9 +494,7 @@ def _find_external_columns(schema: Schema) -> list[dict]:
             AND DATA_TYPE = 'binary'
             AND CHARACTER_MAXIMUM_LENGTH = 16
         """
-        columns = connection.query(
-            columns_query, args=(schema.database, table_name)
-        ).fetchall()
+        columns = connection.query(columns_query, args=(schema.database, table_name)).fetchall()
 
         for column_name, column_type, comment in columns:
             comment = comment or ""
@@ -506,28 +503,32 @@ def _find_external_columns(schema: Schema) -> list[dict]:
             blob_match = EXTERNAL_PATTERNS["blob"].search(comment)
             if blob_match:
                 store_name = blob_match.group(1) or "external"
-                results.append({
-                    "table_name": table_name,
-                    "column_name": column_name,
-                    "column_type": column_type,
-                    "comment": comment,
-                    "store_name": store_name,
-                    "external_type": "blob",
-                })
+                results.append(
+                    {
+                        "table_name": table_name,
+                        "column_name": column_name,
+                        "column_type": column_type,
+                        "comment": comment,
+                        "store_name": store_name,
+                        "external_type": "blob",
+                    }
+                )
                 continue
 
             # Check for external attach pattern
             attach_match = EXTERNAL_PATTERNS["attach"].search(comment)
             if attach_match:
                 store_name = attach_match.group(1) or "external"
-                results.append({
-                    "table_name": table_name,
-                    "column_name": column_name,
-                    "column_type": column_type,
-                    "comment": comment,
-                    "store_name": store_name,
-                    "external_type": "attach",
-                })
+                results.append(
+                    {
+                        "table_name": table_name,
+                        "column_name": column_name,
+                        "column_type": column_type,
+                        "comment": comment,
+                        "store_name": store_name,
+                        "external_type": "attach",
+                    }
+                )
 
     return results
 
@@ -561,22 +562,22 @@ def _find_filepath_columns(schema: Schema) -> list[dict]:
             AND DATA_TYPE = 'varchar'
             AND COLUMN_COMMENT LIKE '%%:filepath%%'
         """
-        columns = connection.query(
-            columns_query, args=(schema.database, table_name)
-        ).fetchall()
+        columns = connection.query(columns_query, args=(schema.database, table_name)).fetchall()
 
         for column_name, column_type, comment in columns:
             comment = comment or ""
             match = FILEPATH_PATTERN.search(comment)
             if match:
                 store_name = match.group(1) or "external"
-                results.append({
-                    "table_name": table_name,
-                    "column_name": column_name,
-                    "column_type": column_type,
-                    "comment": comment,
-                    "store_name": store_name,
-                })
+                results.append(
+                    {
+                        "table_name": table_name,
+                        "column_name": column_name,
+                        "column_type": column_type,
+                        "comment": comment,
+                        "store_name": store_name,
+                    }
+                )
 
     return results
 
@@ -751,10 +752,7 @@ def migrate_external(
                 count = connection.query(count_sql).fetchone()[0]
                 detail["rows"] = count
                 detail["status"] = "dry_run"
-                logger.info(
-                    f"Would migrate {database}.{table_name}.{column_name}: "
-                    f"{count} rows, store={store_name}"
-                )
+                logger.info(f"Would migrate {database}.{table_name}.{column_name}: " f"{count} rows, store={store_name}")
             else:
                 try:
                     # Add new JSON column
@@ -771,6 +769,7 @@ def migrate_external(
 
                     # Get store config for URL building
                     from .settings import config
+
                     store_config = config.get("stores", {}).get(store_name, {})
                     protocol = store_config.get("protocol", "file")
                     location = store_config.get("location", "")
@@ -800,16 +799,11 @@ def migrate_external(
                     result["columns_migrated"] += 1
                     result["rows_migrated"] += count
 
-                    logger.info(
-                        f"Migrated {database}.{table_name}.{column_name}: "
-                        f"{count} rows"
-                    )
+                    logger.info(f"Migrated {database}.{table_name}.{column_name}: " f"{count} rows")
                 except Exception as e:
                     detail["status"] = "error"
                     detail["error"] = str(e)
-                    logger.error(
-                        f"Failed to migrate {table_name}.{column_name}: {e}"
-                    )
+                    logger.error(f"Failed to migrate {table_name}.{column_name}: {e}")
                     raise DataJointError(f"Migration failed: {e}") from e
 
         result["details"].append(detail)
@@ -897,9 +891,7 @@ def migrate_filepath(
             detail["action"] = "finalize"
 
             if dry_run:
-                logger.info(
-                    f"Would finalize {database}.{table_name}.{column_name}"
-                )
+                logger.info(f"Would finalize {database}.{table_name}.{column_name}")
                 detail["status"] = "dry_run"
             else:
                 try:
@@ -952,14 +944,12 @@ def migrate_filepath(
                 count = connection.query(count_sql).fetchone()[0]
                 detail["rows"] = count
                 detail["status"] = "dry_run"
-                logger.info(
-                    f"Would migrate {database}.{table_name}.{column_name}: "
-                    f"{count} rows"
-                )
+                logger.info(f"Would migrate {database}.{table_name}.{column_name}: " f"{count} rows")
             else:
                 try:
                     # Get store config
                     from .settings import config
+
                     store_config = config.get("stores", {}).get(store_name, {})
                     protocol = store_config.get("protocol", "file")
                     location = store_config.get("location", "")
@@ -992,10 +982,7 @@ def migrate_filepath(
                     result["columns_migrated"] += 1
                     result["rows_migrated"] += count
 
-                    logger.info(
-                        f"Migrated {database}.{table_name}.{column_name}: "
-                        f"{count} rows"
-                    )
+                    logger.info(f"Migrated {database}.{table_name}.{column_name}: " f"{count} rows")
                 except Exception as e:
                     detail["status"] = "error"
                     detail["error"] = str(e)
diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py
index 399ab1b9f..98faa83f2 100644
--- a/src/datajoint/schemas.py
+++ b/src/datajoint/schemas.py
@@ -216,7 +216,7 @@ def __call__(self, cls: type, *, context: dict[str, Any] | None = None) -> type:
         cls : type
             Table class to decorate.
         context : dict, optional
-            Declaration context. Supplied by spawn_missing_classes.
+            Declaration context. Supplied by make_classes.
 
         Returns
         -------
@@ -335,39 +335,39 @@ def size_on_disk(self) -> int:
             ).fetchone()[0]
         )
 
-    def spawn_missing_classes(self, context: dict[str, Any] | None = None) -> None:
+    def make_classes(self, into: dict[str, Any] | None = None) -> None:
         """
-        Create Python table classes for tables without existing classes.
+        Create Python table classes for tables in the schema.
 
         Introspects the database schema and creates appropriate Python classes
         (Lookup, Manual, Imported, Computed, Part) for tables that don't have
-        corresponding classes in the context.
+        corresponding classes in the target namespace.
 
         Parameters
         ----------
-        context : dict, optional
+        into : dict, optional
             Namespace to place created classes into. Defaults to caller's
             local namespace.
         """
         self._assert_exists()
-        if context is None:
+        if into is None:
             if self.context is not None:
-                context = self.context
+                into = self.context
             else:
-                # if context is missing, use the calling namespace
+                # if into is missing, use the calling namespace
                 frame = inspect.currentframe().f_back
-                context = frame.f_locals
+                into = frame.f_locals
                 del frame
         tables = [
             row[0]
             for row in self.connection.query("SHOW TABLES in `%s`" % self.database)
-            if lookup_class_name("`{db}`.`{tab}`".format(db=self.database, tab=row[0]), context, 0) is None
+            if lookup_class_name("`{db}`.`{tab}`".format(db=self.database, tab=row[0]), into, 0) is None
         ]
         master_classes = (Lookup, Manual, Imported, Computed)
         part_tables = []
         for table_name in tables:
             class_name = to_camel_case(table_name)
-            if class_name not in context:
+            if class_name not in into:
                 try:
                     cls = next(cls for cls in master_classes if re.fullmatch(cls.tier_regexp, table_name))
                 except StopIteration:
@@ -375,19 +375,19 @@ def spawn_missing_classes(self, context: dict[str, Any] | None = None) -> None:
                         part_tables.append(table_name)
                 else:
                     # declare and decorate master table classes
-                    context[class_name] = self(type(class_name, (cls,), dict()), context=context)
+                    into[class_name] = self(type(class_name, (cls,), dict()), context=into)
 
         # attach parts to masters
         for table_name in part_tables:
             groups = re.fullmatch(Part.tier_regexp, table_name).groupdict()
             class_name = to_camel_case(groups["part"])
             try:
-                master_class = context[to_camel_case(groups["master"])]
+                master_class = into[to_camel_case(groups["master"])]
             except KeyError:
                 raise DataJointError("The table %s does not follow DataJoint naming conventions" % table_name)
             part_class = type(class_name, (Part,), dict(definition=...))
             part_class._master = master_class
-            self._decorate_table(part_class, context=context, assert_declared=True)
+            self._decorate_table(part_class, context=into, assert_declared=True)
             setattr(master_class, class_name, part_class)
 
     def drop(self, prompt: bool | None = None) -> None:
@@ -830,7 +830,7 @@ def __init__(
         if add_objects:
             self.__dict__.update(add_objects)
         self.__dict__["schema"] = _schema
-        _schema.spawn_missing_classes(context=self.__dict__)
+        _schema.make_classes(into=self.__dict__)
 
 
 def list_schemas(connection: Connection | None = None) -> list[str]:
diff --git a/src/datajoint/version.py b/src/datajoint/version.py
index 31f651ea6..426a00789 100644
--- a/src/datajoint/version.py
+++ b/src/datajoint/version.py
@@ -1,4 +1,4 @@
 # version bump auto managed by Github Actions:
 # label_prs.yaml(prep), release.yaml(bump), post_release.yaml(edit)
 # manually set this version will be eventually overwritten by the above actions
-__version__ = "2.0.0a16"
+__version__ = "2.0.0a17"
diff --git a/tests/integration/test_codecs.py b/tests/integration/test_codecs.py
index 6d160e5b5..f4ed7483a 100644
--- a/tests/integration/test_codecs.py
+++ b/tests/integration/test_codecs.py
@@ -39,9 +39,9 @@ def schema_codec(
 
 @pytest.fixture
 def local_schema(schema_codec, schema_name):
-    """Fixture for testing spawned classes"""
+    """Fixture for testing generated classes"""
     local_schema = dj.Schema(schema_name, connection=schema_codec.connection)
-    local_schema.spawn_missing_classes()
+    local_schema.make_classes()
     yield local_schema
     # Don't drop - schema_codec fixture handles cleanup
 
diff --git a/tests/integration/test_npy_codec.py b/tests/integration/test_npy_codec.py
new file mode 100644
index 000000000..b5438c68b
--- /dev/null
+++ b/tests/integration/test_npy_codec.py
@@ -0,0 +1,439 @@
+"""
+Tests for the NpyCodec - schema-addressed numpy array storage.
+
+These tests verify:
+- NpyCodec encode/decode roundtrip
+- NpyRef lazy loading behavior
+- NpyRef metadata access without I/O
+- NpyRef numpy integration via __array__
+- Schema-addressed path construction
+"""
+
+import numpy as np
+import pytest
+
+import datajoint as dj
+from datajoint.builtin_codecs import NpyCodec, NpyRef, SchemaCodec
+
+
+# =============================================================================
+# Test Schema Definition
+# =============================================================================
+
+
+class Recording(dj.Manual):
+    definition = """
+    recording_id : int
+    ---
+    waveform : <npy@repo-s3>
+    """
+
+
+class MultiArray(dj.Manual):
+    definition = """
+    item_id : int
+    ---
+    small_array : <npy@repo-s3>
+    large_array : <npy@repo-s3>
+    """
+
+
+LOCALS_NPY = {"Recording": Recording, "MultiArray": MultiArray}
+
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def schema_name(prefix):
+    return prefix + "_test_npy_codec"
+
+
+@pytest.fixture
+def schema_npy(connection_test, s3_creds, tmpdir, schema_name, mock_stores):
+    """Create schema with NpyCodec tables."""
+    # mock_stores fixture sets up object_storage.stores with repo-s3, etc.
+    context = dict(LOCALS_NPY)
+    schema = dj.schema(schema_name, context=context, connection=connection_test)
+    schema(Recording)
+    schema(MultiArray)
+    yield schema
+    schema.drop()
+
+
+# =============================================================================
+# Unit Tests (no database required)
+# =============================================================================
+
+
+class TestNpyRefUnit:
+    """Unit tests for NpyRef without database."""
+
+    def test_npy_ref_metadata_access(self):
+        """NpyRef should provide metadata without I/O."""
+        # Mock metadata as would be stored in JSON
+        metadata = {
+            "path": "test/recording/recording_id=1/waveform.npy",
+            "store": "default",
+            "dtype": "float64",
+            "shape": [1000, 32],
+        }
+
+        # Create NpyRef with mock backend
+        class MockBackend:
+            def get_buffer(self, path):
+                raise AssertionError("Should not be called for metadata access")
+
+        ref = NpyRef(metadata, MockBackend())
+
+        # These should NOT trigger I/O
+        assert ref.shape == (1000, 32)
+        assert ref.dtype == np.dtype("float64")
+        assert ref.ndim == 2
+        assert ref.size == 32000
+        assert ref.nbytes == 32000 * 8  # float64 = 8 bytes
+        assert ref.path == "test/recording/recording_id=1/waveform.npy"
+        assert ref.store == "default"
+        assert ref.is_loaded is False
+
+    def test_npy_ref_repr(self):
+        """NpyRef repr should show shape, dtype, and load status."""
+        metadata = {
+            "path": "test.npy",
+            "store": None,
+            "dtype": "int32",
+            "shape": [100],
+        }
+
+        class MockBackend:
+            pass
+
+        ref = NpyRef(metadata, MockBackend())
+        repr_str = repr(ref)
+
+        assert "NpyRef" in repr_str
+        assert "(100,)" in repr_str
+        assert "int32" in repr_str
+        assert "not loaded" in repr_str
+
+    def test_npy_ref_len(self):
+        """NpyRef should support len() for first dimension."""
+        metadata = {"path": "test.npy", "store": None, "dtype": "float32", "shape": [50, 10]}
+
+        class MockBackend:
+            pass
+
+        ref = NpyRef(metadata, MockBackend())
+        assert len(ref) == 50
+
+    def test_npy_ref_len_0d_raises(self):
+        """NpyRef len() should raise for 0-d arrays."""
+        metadata = {"path": "test.npy", "store": None, "dtype": "float32", "shape": []}
+
+        class MockBackend:
+            pass
+
+        ref = NpyRef(metadata, MockBackend())
+        with pytest.raises(TypeError, match="0-dimensional"):
+            len(ref)
+
+
+class TestNpyCodecUnit:
+    """Unit tests for NpyCodec without database."""
+
+    def test_codec_is_schema_codec(self):
+        """NpyCodec should inherit from SchemaCodec."""
+        codec = NpyCodec()
+        assert isinstance(codec, SchemaCodec)
+
+    def test_codec_name(self):
+        """NpyCodec should be registered as 'npy'."""
+        codec = NpyCodec()
+        assert codec.name == "npy"
+
+    def test_codec_requires_store(self):
+        """NpyCodec should require @ modifier."""
+        codec = NpyCodec()
+
+        # Should raise without @
+        with pytest.raises(dj.DataJointError, match="requires @"):
+            codec.get_dtype(is_store=False)
+
+        # Should return json with @
+        assert codec.get_dtype(is_store=True) == "json"
+
+    def test_codec_validate_requires_ndarray(self):
+        """NpyCodec should reject non-ndarray values."""
+        codec = NpyCodec()
+
+        # Should reject list
+        with pytest.raises(dj.DataJointError, match="requires numpy.ndarray"):
+            codec.validate([1, 2, 3])
+
+        # Should reject dict
+        with pytest.raises(dj.DataJointError, match="requires numpy.ndarray"):
+            codec.validate({"data": [1, 2, 3]})
+
+        # Should accept ndarray
+        codec.validate(np.array([1, 2, 3]))  # No exception
+
+    def test_codec_validate_rejects_object_dtype(self):
+        """NpyCodec should reject object dtype arrays."""
+        codec = NpyCodec()
+
+        obj_array = np.array([{}, []], dtype=object)
+        with pytest.raises(dj.DataJointError, match="object dtype"):
+            codec.validate(obj_array)
+
+
+# =============================================================================
+# Integration Tests (require database + MinIO)
+# =============================================================================
+
+
+class TestNpyCodecIntegration:
+    """Integration tests for NpyCodec with real storage."""
+
+    def test_insert_fetch_roundtrip(self, schema_npy, minio_client):
+        """Basic insert and fetch should preserve array data."""
+        rec = Recording()
+        rec.delete()
+
+        # Insert array
+        original = np.random.randn(100, 32).astype(np.float64)
+        rec.insert1({"recording_id": 1, "waveform": original})
+
+        # Fetch returns NpyRef
+        result = rec.fetch1("waveform")
+        assert isinstance(result, NpyRef)
+
+        # Load and compare
+        loaded = result.load()
+        assert isinstance(loaded, np.ndarray)
+        np.testing.assert_array_equal(loaded, original)
+
+        rec.delete()
+
+    def test_npy_ref_caching(self, schema_npy, minio_client):
+        """NpyRef should cache loaded data."""
+        rec = Recording()
+        rec.delete()
+
+        original = np.array([1, 2, 3, 4, 5])
+        rec.insert1({"recording_id": 1, "waveform": original})
+
+        ref = rec.fetch1("waveform")
+
+        # First load
+        arr1 = ref.load()
+        assert ref.is_loaded is True
+
+        # Second load should return same object (cached)
+        arr2 = ref.load()
+        assert arr1 is arr2
+
+        rec.delete()
+
+    def test_npy_ref_array_protocol(self, schema_npy, minio_client):
+        """NpyRef should work transparently in numpy operations."""
+        rec = Recording()
+        rec.delete()
+
+        original = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+        rec.insert1({"recording_id": 1, "waveform": original})
+
+        ref = rec.fetch1("waveform")
+
+        # __array__ is triggered by numpy functions, not Python operators
+        # Use np.asarray() or pass to numpy functions
+        result = np.asarray(ref) + 1
+        np.testing.assert_array_equal(result, original + 1)
+
+        result = np.mean(ref)
+        assert result == np.mean(original)
+
+        result = np.asarray(ref)
+        np.testing.assert_array_equal(result, original)
+
+        # Also test that numpy ufuncs work
+        result = np.add(ref, 1)
+        np.testing.assert_array_equal(result, original + 1)
+
+        rec.delete()
+
+    def test_npy_ref_indexing(self, schema_npy, minio_client):
+        """NpyRef should support indexing/slicing."""
+        rec = Recording()
+        rec.delete()
+
+        original = np.arange(100).reshape(10, 10)
+        rec.insert1({"recording_id": 1, "waveform": original})
+
+        ref = rec.fetch1("waveform")
+
+        # Indexing
+        assert ref[0, 0] == 0
+        assert ref[5, 5] == 55
+
+        # Slicing
+        np.testing.assert_array_equal(ref[0:2], original[0:2])
+        np.testing.assert_array_equal(ref[:, 0], original[:, 0])
+
+        rec.delete()
+
+    def test_bulk_fetch_lazy(self, schema_npy, minio_client):
+        """Fetching via to_dicts should return NpyRefs that are lazy."""
+        rec = Recording()
+        rec.delete()
+
+        # Insert multiple arrays
+        for i in range(5):
+            rec.insert1({"recording_id": i, "waveform": np.random.randn(10, 10)})
+
+        # Fetch all using to_dicts - should return NpyRefs
+        results = rec.to_dicts()
+        assert len(results) == 5
+
+        refs = [r["waveform"] for r in results]
+        for ref in refs:
+            assert isinstance(ref, NpyRef)
+            assert ref.is_loaded is False  # Not loaded yet
+
+        # Access metadata without loading
+        shapes = [ref.shape for ref in refs]
+        assert all(s == (10, 10) for s in shapes)
+        assert all(not ref.is_loaded for ref in refs)  # Still not loaded
+
+        # Now load one
+        refs[0].load()
+        assert refs[0].is_loaded is True
+        assert not refs[1].is_loaded  # Others still not loaded
+
+        rec.delete()
+
+    def test_different_dtypes(self, schema_npy, minio_client):
+        """NpyCodec should handle various numpy dtypes."""
+        rec = Recording()
+        rec.delete()
+
+        test_cases = [
+            (1, np.array([1, 2, 3], dtype=np.int32)),
+            (2, np.array([1.0, 2.0, 3.0], dtype=np.float32)),
+            (3, np.array([1.0, 2.0, 3.0], dtype=np.float64)),
+            (4, np.array([True, False, True], dtype=np.bool_)),
+            (5, np.array([1 + 2j, 3 + 4j], dtype=np.complex128)),
+        ]
+
+        for rec_id, arr in test_cases:
+            rec.insert1({"recording_id": rec_id, "waveform": arr})
+
+        for rec_id, original in test_cases:
+            ref = (rec & f"recording_id={rec_id}").fetch1("waveform")
+            loaded = ref.load()
+            assert loaded.dtype == original.dtype
+            np.testing.assert_array_equal(loaded, original)
+
+        rec.delete()
+
+    def test_multidimensional_arrays(self, schema_npy, minio_client):
+        """NpyCodec should handle various array shapes."""
+        rec = Recording()
+        rec.delete()
+
+        test_cases = [
+            (1, np.array([1, 2, 3])),  # 1D
+            (2, np.array([[1, 2], [3, 4]])),  # 2D
+            (3, np.random.randn(2, 3, 4)),  # 3D
+            (4, np.random.randn(2, 3, 4, 5)),  # 4D
+            (5, np.array(42)),  # 0D scalar
+        ]
+
+        for rec_id, arr in test_cases:
+            rec.insert1({"recording_id": rec_id, "waveform": arr})
+
+        for rec_id, original in test_cases:
+            ref = (rec & f"recording_id={rec_id}").fetch1("waveform")
+            assert ref.shape == original.shape
+            assert ref.ndim == original.ndim
+            loaded = ref.load()
+            np.testing.assert_array_equal(loaded, original)
+
+        rec.delete()
+
+    def test_schema_addressed_path(self, schema_npy, minio_client):
+        """NpyCodec should store files with .npy extension."""
+        rec = Recording()
+        rec.delete()
+
+        rec.insert1({"recording_id": 42, "waveform": np.array([1, 2, 3])})
+
+        ref = rec.fetch1("waveform")
+        path = ref.path
+
+        # Path should end with .npy extension
+        assert path.endswith(".npy"), f"Path should end with .npy, got: {path}"
+
+        # Verify the file can be loaded
+        arr = ref.load()
+        np.testing.assert_array_equal(arr, np.array([1, 2, 3]))
+
+        rec.delete()
+
+
+class TestNpyCodecEdgeCases:
+    """Edge case tests for NpyCodec."""
+
+    def test_empty_array(self, schema_npy, minio_client):
+        """NpyCodec should handle empty arrays."""
+        rec = Recording()
+        rec.delete()
+
+        empty = np.array([])
+        rec.insert1({"recording_id": 1, "waveform": empty})
+
+        ref = rec.fetch1("waveform")
+        assert ref.shape == (0,)
+        assert ref.size == 0
+
+        loaded = ref.load()
+        np.testing.assert_array_equal(loaded, empty)
+
+        rec.delete()
+
+    def test_large_array(self, schema_npy, minio_client):
+        """NpyCodec should handle large arrays."""
+        rec = Recording()
+        rec.delete()
+
+        # 10MB array
+        large = np.random.randn(1000, 1000).astype(np.float64)
+        rec.insert1({"recording_id": 1, "waveform": large})
+
+        ref = rec.fetch1("waveform")
+        assert ref.shape == (1000, 1000)
+        assert ref.nbytes == 8_000_000
+
+        loaded = ref.load()
+        np.testing.assert_array_equal(loaded, large)
+
+        rec.delete()
+
+    def test_structured_array(self, schema_npy, minio_client):
+        """NpyCodec should handle structured arrays."""
+        rec = Recording()
+        rec.delete()
+
+        dt = np.dtype([("x", np.float64), ("y", np.float64), ("label", "U10")])
+        structured = np.array([(1.0, 2.0, "a"), (3.0, 4.0, "b")], dtype=dt)
+
+        rec.insert1({"recording_id": 1, "waveform": structured})
+
+        ref = rec.fetch1("waveform")
+        loaded = ref.load()
+
+        assert loaded.dtype == structured.dtype
+        np.testing.assert_array_equal(loaded, structured)
+
+        rec.delete()
diff --git a/tests/integration/test_privileges.py b/tests/integration/test_privileges.py
index 0939823a0..763e7c04b 100644
--- a/tests/integration/test_privileges.py
+++ b/tests/integration/test_privileges.py
@@ -81,11 +81,11 @@ def test_fail_create_schema(self, connection_djview):
 
     def test_insert_failure(self, connection_djview, schema_any):
         unprivileged = dj.Schema(schema_any.database, namespace, connection=connection_djview)
-        unprivileged.spawn_missing_classes()
+        unprivileged.make_classes()
         UnprivilegedLanguage = namespace["Language"]
         assert issubclass(UnprivilegedLanguage, dj.Lookup) and len(UnprivilegedLanguage()) == len(
             schema.Language()
-        ), "failed to spawn missing classes"
+        ), "failed to make classes"
         with pytest.raises(dj.DataJointError):
             UnprivilegedLanguage().insert1(("Socrates", "Greek"))
 
diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py
index 8cf231bf5..6ef615466 100644
--- a/tests/integration/test_schema.py
+++ b/tests/integration/test_schema.py
@@ -29,7 +29,7 @@ def schema_empty_module(schema_any, schema_empty):
     """
     Mock the module tests_old.schema_empty.
     The test `test_namespace_population` will check that the module contains all the
-    classes in schema_any, after running `spawn_missing_classes`.
+    classes in schema_any, after running `make_classes`.
     """
     namespace_dict = {
         "_": schema_any,
@@ -51,7 +51,7 @@ def schema_empty(connection_test, schema_any, prefix):
     schema_empty = dj.Schema(prefix + "_test1", context=context, connection=connection_test)
     schema_empty(Ephys)
     # load the rest of the classes
-    schema_empty.spawn_missing_classes(context=context)
+    schema_empty.make_classes(into=context)
     yield schema_empty
     # Don't drop the schema since schema_any still needs it
 
@@ -77,12 +77,12 @@ def test_drop_unauthorized(connection_test):
 def test_namespace_population(schema_empty_module):
     """
     With the schema_empty_module fixture, this test
-    mimics the behavior of `spawn_missing_classes`, as if the schema
-    was declared in a separate module and `spawn_missing_classes` was called in that namespace.
+    mimics the behavior of `make_classes`, as if the schema
+    was declared in a separate module and `make_classes` was called in that namespace.
     """
-    # Spawn missing classes in the caller's (self) namespace.
+    # Create classes in the caller's (self) namespace.
     schema_empty_module.schema.context = None
-    schema_empty_module.schema.spawn_missing_classes(context=None)
+    schema_empty_module.schema.make_classes(into=None)
     # Then add them to the mock module's namespace.
     for k, v in locals().items():
         if inspect.isclass(v):
diff --git a/tests/schema_codecs.py b/tests/schema_codecs.py
index 6a8d478d4..97307f985 100644
--- a/tests/schema_codecs.py
+++ b/tests/schema_codecs.py
@@ -10,7 +10,7 @@ class GraphCodec(dj.Codec):
 
     name = "graph"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Chain to blob for serialization."""
         return "<blob>"
 
@@ -29,7 +29,7 @@ class LayoutCodec(dj.Codec):
 
     name = "layout"
 
-    def get_dtype(self, is_external: bool) -> str:
+    def get_dtype(self, is_store: bool) -> str:
         """Chain to blob for serialization."""
         return "<blob>"
 
diff --git a/tests/unit/test_codecs.py b/tests/unit/test_codecs.py
index ada626748..9e0460ca6 100644
--- a/tests/unit/test_codecs.py
+++ b/tests/unit/test_codecs.py
@@ -368,8 +368,8 @@ def test_blob_properties(self):
         """Test BlobCodec properties."""
         blob_codec = get_codec("blob")
         assert blob_codec.name == "blob"
-        assert blob_codec.get_dtype(is_external=False) == "bytes"
-        assert blob_codec.get_dtype(is_external=True) == "<hash>"
+        assert blob_codec.get_dtype(is_store=False) == "bytes"
+        assert blob_codec.get_dtype(is_store=True) == "<hash>"
 
     def test_blob_encode_decode_roundtrip(self):
         """Test that encode/decode is a proper roundtrip."""

From 588751b845ac32183e33e4ebe02f075ac7fb155d Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 16:29:37 -0600
Subject: [PATCH 03/10] fix: Pass schema context to codec encode for
 schema-addressed paths

The SchemaCodec (used by NpyCodec and ObjectCodec) needs _schema,
_table, _field, and primary key values to construct schema-addressed
storage paths. Previously, key=None was passed, resulting in
"unknown/unknown" paths.

Now builds proper context dict from table metadata and row values,
enabling navigable paths like:
  {schema}/{table}/objects/{pk_path}/{attribute}.npy

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/table.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/datajoint/table.py b/src/datajoint/table.py
index 0040943c5..4fa0599d8 100644
--- a/src/datajoint/table.py
+++ b/src/datajoint/table.py
@@ -1177,6 +1177,19 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None):
             # Resolve full type chain
             _, type_chain, resolved_store = resolve_dtype(f"<{attr.codec.name}>", store_name=attr.store)
 
+            # Build context dict for schema-addressed codecs
+            # Include _schema, _table, _field, and primary key values
+            context = {
+                "_schema": self.database,
+                "_table": self.table_name,
+                "_field": name,
+            }
+            # Add primary key values from row if available
+            if row is not None:
+                for pk_name in self.primary_key:
+                    if pk_name in row:
+                        context[pk_name] = row[pk_name]
+
             # Apply encoders from outermost to innermost
             for attr_type in type_chain:
                 # Pass store_name to encoders that support it (check via introspection)
@@ -1184,9 +1197,9 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None):
 
                 sig = inspect.signature(attr_type.encode)
                 if "store_name" in sig.parameters:
-                    value = attr_type.encode(value, key=None, store_name=resolved_store)
+                    value = attr_type.encode(value, key=context, store_name=resolved_store)
                 else:
-                    value = attr_type.encode(value, key=None)
+                    value = attr_type.encode(value, key=context)
 
         # Handle NULL values
         if value is None or (attr.numeric and (value == "" or np.isnan(float(value)))):

From 9f6826efb2a793b7eb833333bcedbdb7d16476f2 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 16:55:29 -0600
Subject: [PATCH 04/10] chore: Merge enhance/blob-preview-display and bump to
 2.0.0a18

Merge PR #1330 (blob preview display) into feature/npy-codec.
Bump version from 2.0.0a17 to 2.0.0a18.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/datajoint/version.py b/src/datajoint/version.py
index 426a00789..cc70cfae4 100644
--- a/src/datajoint/version.py
+++ b/src/datajoint/version.py
@@ -1,4 +1,4 @@
 # version bump auto managed by Github Actions:
 # label_prs.yaml(prep), release.yaml(bump), post_release.yaml(edit)
 # manually set this version will be eventually overwritten by the above actions
-__version__ = "2.0.0a17"
+__version__ = "2.0.0a18"

From acfaf0e858ade3278db9455e850e3f45cec5f3d3 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 17:00:52 -0600
Subject: [PATCH 05/10] fix: Raise error instead of returning 'bytes' for
 missing field

Address reviewer feedback from PR #1330: attr should never be None
since field_name comes from heading.names. Raising an error surfaces
bugs immediately rather than silently returning a misleading placeholder.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/preview.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/datajoint/preview.py b/src/datajoint/preview.py
index c0f103eb1..1710bff6b 100644
--- a/src/datajoint/preview.py
+++ b/src/datajoint/preview.py
@@ -26,9 +26,11 @@ def _format_object_display(json_data):
 
 def _get_blob_placeholder(heading, field_name, html_escape=False):
     """Get display placeholder for a blob/json field based on its codec."""
+    from .errors import DataJointError
+
     attr = heading.attributes.get(field_name)
     if attr is None:
-        return "bytes"
+        raise DataJointError(f"Field '{field_name}' not found in heading")
     if attr.codec is not None:
         name = attr.codec.name
         if html_escape:

From 12ea8140f017698acd4143db08861611a9e58472 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 18:09:29 -0600
Subject: [PATCH 06/10] feat: Add mmap_mode parameter to NpyRef.load()

Support memory-mapped loading for large arrays:
- Local filesystem stores: mmap directly, no download
- Remote stores: download to cache, then mmap

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/builtin_codecs.py     | 67 ++++++++++++++++++++++++----
 tests/integration/test_npy_codec.py | 68 +++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 9 deletions(-)

diff --git a/src/datajoint/builtin_codecs.py b/src/datajoint/builtin_codecs.py
index 499fec846..ff1977242 100644
--- a/src/datajoint/builtin_codecs.py
+++ b/src/datajoint/builtin_codecs.py
@@ -940,28 +940,77 @@ def is_loaded(self) -> bool:
         """True if array data has been downloaded and cached."""
         return self._cached is not None
 
-    def load(self):
+    def load(self, mmap_mode=None):
         """
         Download and return the array.
 
+        Parameters
+        ----------
+        mmap_mode : str, optional
+            Memory-map mode for lazy, random-access loading of large arrays:
+
+            - ``'r'``: Read-only
+            - ``'r+'``: Read-write
+            - ``'c'``: Copy-on-write (changes not saved to disk)
+
+            If None (default), loads entire array into memory.
+
         Returns
         -------
-        numpy.ndarray
-            The array data.
+        numpy.ndarray or numpy.memmap
+            The array data. Returns ``numpy.memmap`` if mmap_mode is specified.
 
         Notes
         -----
-        The array is cached after first load. Subsequent calls return
-        the cached copy without additional I/O.
+        When ``mmap_mode`` is None, the array is cached after first load.
+
+        For local filesystem stores, memory mapping accesses the file directly
+        with no download. For remote stores (S3, etc.), the file is downloaded
+        to a local cache (``{tempdir}/datajoint_mmap/``) before memory mapping.
+
+        Examples
+        --------
+        Standard loading::
+
+            arr = ref.load()  # Loads entire array into memory
+
+        Memory-mapped for random access to large arrays::
+
+            arr = ref.load(mmap_mode='r')
+            slice = arr[1000:2000]  # Only reads the needed portion from disk
         """
         import io
 
         import numpy as np
 
-        if self._cached is None:
-            buffer = self._backend.get_buffer(self.path)
-            self._cached = np.load(io.BytesIO(buffer), allow_pickle=False)
-        return self._cached
+        if mmap_mode is None:
+            # Standard loading with caching
+            if self._cached is None:
+                buffer = self._backend.get_buffer(self.path)
+                self._cached = np.load(io.BytesIO(buffer), allow_pickle=False)
+            return self._cached
+        else:
+            # Memory-mapped loading
+            if self._backend.protocol == "file":
+                # Local filesystem - mmap directly, no download needed
+                local_path = self._backend._full_path(self.path)
+                return np.load(local_path, mmap_mode=mmap_mode, allow_pickle=False)
+            else:
+                # Remote storage - download to local cache first
+                import hashlib
+                import tempfile
+                from pathlib import Path
+
+                path_hash = hashlib.md5(self.path.encode()).hexdigest()[:12]
+                cache_dir = Path(tempfile.gettempdir()) / "datajoint_mmap"
+                cache_dir.mkdir(exist_ok=True)
+                cache_path = cache_dir / f"{path_hash}.npy"
+
+                if not cache_path.exists():
+                    buffer = self._backend.get_buffer(self.path)
+                    cache_path.write_bytes(buffer)
+
+                return np.load(str(cache_path), mmap_mode=mmap_mode, allow_pickle=False)
 
     def __array__(self, dtype=None):
         """
diff --git a/tests/integration/test_npy_codec.py b/tests/integration/test_npy_codec.py
index b5438c68b..70e3e098a 100644
--- a/tests/integration/test_npy_codec.py
+++ b/tests/integration/test_npy_codec.py
@@ -139,6 +139,74 @@ class MockBackend:
         with pytest.raises(TypeError, match="0-dimensional"):
             len(ref)
 
+    def test_npy_ref_mmap_local_filesystem(self, tmp_path):
+        """NpyRef mmap_mode should work directly on local filesystem."""
+        # Create a real .npy file
+        test_array = np.arange(100, dtype=np.float64)
+        npy_path = tmp_path / "test.npy"
+        np.save(npy_path, test_array)
+
+        metadata = {
+            "path": "test.npy",
+            "store": None,
+            "dtype": "float64",
+            "shape": [100],
+        }
+
+        # Mock backend that simulates local filesystem
+        class MockFileBackend:
+            protocol = "file"
+
+            def _full_path(self, path):
+                return str(tmp_path / path)
+
+            def get_buffer(self, path):
+                return (tmp_path / path).read_bytes()
+
+        ref = NpyRef(metadata, MockFileBackend())
+
+        # Load with mmap_mode
+        mmap_arr = ref.load(mmap_mode="r")
+
+        # Should be a memmap
+        assert isinstance(mmap_arr, np.memmap)
+        np.testing.assert_array_equal(mmap_arr, test_array)
+
+        # Standard load should still work and cache
+        regular_arr = ref.load()
+        assert isinstance(regular_arr, np.ndarray)
+        assert not isinstance(regular_arr, np.memmap)
+        np.testing.assert_array_equal(regular_arr, test_array)
+
+    def test_npy_ref_mmap_remote_storage(self, tmp_path):
+        """NpyRef mmap_mode should download to cache for remote storage."""
+        # Create test data
+        test_array = np.array([1, 2, 3, 4, 5], dtype=np.int32)
+        npy_buffer = np.save(tmp_path / "temp.npy", test_array)
+        npy_bytes = (tmp_path / "temp.npy").read_bytes()
+
+        metadata = {
+            "path": "remote/path/data.npy",
+            "store": "s3-store",
+            "dtype": "int32",
+            "shape": [5],
+        }
+
+        # Mock backend that simulates remote storage
+        class MockS3Backend:
+            protocol = "s3"
+
+            def get_buffer(self, path):
+                return npy_bytes
+
+        ref = NpyRef(metadata, MockS3Backend())
+
+        # Load with mmap_mode - should download to cache
+        mmap_arr = ref.load(mmap_mode="r")
+
+        assert isinstance(mmap_arr, np.memmap)
+        np.testing.assert_array_equal(mmap_arr, test_array)
+
 
 class TestNpyCodecUnit:
     """Unit tests for NpyCodec without database."""

From c02a8826a5d7e740573f90d77c5ef0410e10cb72 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Mon, 12 Jan 2026 18:21:01 -0600
Subject: [PATCH 07/10] fix: Remove unused variable in mmap test

---
 tests/integration/test_npy_codec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_npy_codec.py b/tests/integration/test_npy_codec.py
index 70e3e098a..bf8a8bcac 100644
--- a/tests/integration/test_npy_codec.py
+++ b/tests/integration/test_npy_codec.py
@@ -182,7 +182,7 @@ def test_npy_ref_mmap_remote_storage(self, tmp_path):
         """NpyRef mmap_mode should download to cache for remote storage."""
         # Create test data
         test_array = np.array([1, 2, 3, 4, 5], dtype=np.int32)
-        npy_buffer = np.save(tmp_path / "temp.npy", test_array)
+        np.save(tmp_path / "temp.npy", test_array)
         npy_bytes = (tmp_path / "temp.npy").read_bytes()
 
         metadata = {

From 0d1ffe7b822cfb3ed30fc6060dd966d693131ec3 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Tue, 13 Jan 2026 13:23:35 -0600
Subject: [PATCH 08/10] refactor: Rename content_registry to hash_registry with
 path-based storage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Major changes to hash-addressed storage model:
- Rename content_registry.py → hash_registry.py for clarity
- Always store full path in metadata (protects against config changes)
- Use stored path directly for retrieval (no path regeneration)
- Add delete_path() as primary function, deprecate delete_hash()
- Add get_size() as primary function, deprecate get_hash_size()
- Update gc.py to work with paths instead of hashes
- Update builtin_codecs.py HashCodec to use new API

This design enables seamless migration from v0.14:
- Legacy data keeps old paths in metadata
- New data uses new path structure
- GC compares stored paths against filesystem

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .gitignore                                |   2 +-
 pyproject.toml                            |   2 +-
 src/datajoint/builtin_codecs.py           |  55 +--
 src/datajoint/content_registry.py         | 231 ------------
 src/datajoint/gc.py                       | 403 +++++++++++----------
 src/datajoint/hash_registry.py            | 415 ++++++++++++++++++++++
 tests/integration/test_content_storage.py | 231 ------------
 tests/integration/test_gc.py              | 208 +++++------
 tests/integration/test_hash_storage.py    | 304 ++++++++++++++++
 9 files changed, 1080 insertions(+), 771 deletions(-)
 delete mode 100644 src/datajoint/content_registry.py
 create mode 100644 src/datajoint/hash_registry.py
 delete mode 100644 tests/integration/test_content_storage.py
 create mode 100644 tests/integration/test_hash_storage.py

diff --git a/.gitignore b/.gitignore
index 3c88c420c..5079dca62 100644
--- a/.gitignore
+++ b/.gitignore
@@ -187,7 +187,7 @@ dj_local_conf.json
 !.vscode/launch.json
 # pixi environments
 .pixi
-_content/
+_hash/
 
 # Local config
 .secrets/
diff --git a/pyproject.toml b/pyproject.toml
index ef9e622a2..7cd06d786 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -168,7 +168,7 @@ check_untyped_defs = true
 # Modules with complete type coverage - strict checking enabled
 [[tool.mypy.overrides]]
 module = [
-    "datajoint.content_registry",
+    "datajoint.hash_registry",
     "datajoint.errors",
     "datajoint.hash",
 ]
diff --git a/src/datajoint/builtin_codecs.py b/src/datajoint/builtin_codecs.py
index ff1977242..96c95b7b9 100644
--- a/src/datajoint/builtin_codecs.py
+++ b/src/datajoint/builtin_codecs.py
@@ -7,8 +7,8 @@
 
 Built-in Codecs:
     - ``<blob>``: Serialize Python objects (internal) or external with dedup
-    - ``<hash>``: Hash-addressed storage with MD5 deduplication
-    - ``<object>``: Path-addressed storage for files/folders (Zarr, HDF5)
+    - ``<hash>``: Hash-addressed storage with SHA256 deduplication
+    - ``<object>``: Schema-addressed storage for files/folders (Zarr, HDF5)
     - ``<attach>``: File attachment (internal) or external with dedup
     - ``<filepath@store>``: Reference to existing file in store
     - ``<npy@>``: Store numpy arrays as portable .npy files (external only)
@@ -127,14 +127,16 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> Any:
 
 class HashCodec(Codec):
     """
-    Hash-addressed storage with MD5 deduplication.
+    Hash-addressed storage with SHA256 deduplication.
 
-    The ``<hash@>`` codec stores raw bytes using content-addressed storage.
-    Data is identified by its MD5 hash and stored in a hierarchical directory:
+    The ``<hash@>`` codec stores raw bytes using hash-addressed storage.
+    Data is identified by its SHA256 hash and stored in a hierarchical directory:
     ``_hash/{hash[:2]}/{hash[2:4]}/{hash}``
 
     The database column stores JSON metadata: ``{hash, store, size}``.
-    Duplicate content is automatically deduplicated.
+    Duplicate content is automatically deduplicated across all tables.
+
+    Deletion: Requires garbage collection via ``dj.gc.collect()``.
 
     External only - requires @ modifier.
 
@@ -154,6 +156,10 @@ class RawContent(dj.Manual):
     Note:
         This codec accepts only ``bytes``. For Python objects, use ``<blob@>``.
         Typically used indirectly via ``<blob@>`` or ``<attach@>`` rather than directly.
+
+    See Also
+    --------
+    datajoint.gc : Garbage collection for orphaned storage.
     """
 
     name = "hash"
@@ -173,38 +179,39 @@ def encode(self, value: bytes, *, key: dict | None = None, store_name: str | Non
         value : bytes
             Raw bytes to store.
         key : dict, optional
-            Primary key values (unused).
+            Context dict with ``_schema`` for path isolation.
         store_name : str, optional
             Store to use. If None, uses default store.
 
         Returns
         -------
         dict
-            Metadata dict: ``{hash, store, size}``.
+            Metadata dict: ``{hash, path, schema, store, size}``.
         """
-        from .content_registry import put_content
+        from .hash_registry import put_hash
 
-        return put_content(value, store_name=store_name)
+        schema_name = (key or {}).get("_schema", "unknown")
+        return put_hash(value, schema_name=schema_name, store_name=store_name)
 
     def decode(self, stored: dict, *, key: dict | None = None) -> bytes:
         """
-        Retrieve content by hash.
+        Retrieve content using stored metadata.
 
         Parameters
         ----------
         stored : dict
-            Metadata dict with ``'hash'`` and optionally ``'store'``.
+            Metadata dict with ``'path'``, ``'hash'``, and optionally ``'store'``.
         key : dict, optional
-            Primary key values (unused).
+            Context dict (unused - path is in metadata).
 
         Returns
         -------
         bytes
             Original bytes.
         """
-        from .content_registry import get_content
+        from .hash_registry import get_hash
 
-        return get_content(stored["hash"], store_name=stored.get("store"))
+        return get_hash(stored)
 
     def validate(self, value: Any) -> None:
         """Validate that value is bytes."""
@@ -366,7 +373,7 @@ def _get_backend(self, store_name: str | None = None):
         StorageBackend
             Storage backend instance.
         """
-        from .content_registry import get_store_backend
+        from .hash_registry import get_store_backend
 
         return get_store_backend(store_name)
 
@@ -384,8 +391,8 @@ class ObjectCodec(SchemaCodec):
     schema-addressed paths: ``{schema}/{table}/{pk}/{field}/``. This creates
     a browsable organization in object storage that mirrors the database schema.
 
-    Unlike hash-addressed storage (``<hash@>``), each row has its own path
-    and content is deleted when the row is deleted. Ideal for:
+    Unlike hash-addressed storage (``<hash@>``), each row has its own unique path
+    (no deduplication). Ideal for:
 
     - Zarr arrays (hierarchical chunked data)
     - HDF5 files
@@ -419,17 +426,20 @@ def make(self, key):
 
             {store_root}/{schema}/{table}/{pk}/{field}/
 
+    Deletion: Requires garbage collection via ``dj.gc.collect()``.
+
     Comparison with hash-addressed::
 
         | Aspect         | <object@>           | <hash@>             |
         |----------------|---------------------|---------------------|
         | Addressing     | Schema-addressed    | Hash-addressed      |
         | Deduplication  | No                  | Yes                 |
-        | Deletion       | With row            | GC when unreferenced|
+        | Deletion       | GC required         | GC required         |
         | Use case       | Zarr, HDF5          | Blobs, attachments  |
 
     See Also
     --------
+    datajoint.gc : Garbage collection for orphaned storage.
     SchemaCodec : Base class for schema-addressed codecs.
     NpyCodec : Schema-addressed storage for numpy arrays.
     HashCodec : Hash-addressed storage with deduplication.
@@ -782,7 +792,7 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None
         """
         from datetime import datetime, timezone
 
-        from .content_registry import get_store_backend
+        from .hash_registry import get_store_backend
 
         path = str(value)
 
@@ -822,7 +832,7 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any:
             Handle for accessing the file.
         """
         from .objectref import ObjectRef
-        from .content_registry import get_store_backend
+        from .hash_registry import get_store_backend
 
         store_name = stored.get("store")
         backend = get_store_backend(store_name)
@@ -1103,8 +1113,11 @@ class Recording(dj.Manual):
         - Path: ``{schema}/{table}/{pk}/{attribute}.npy``
         - Database column: JSON with ``{path, store, dtype, shape}``
 
+    Deletion: Requires garbage collection via ``dj.gc.collect()``.
+
     See Also
     --------
+    datajoint.gc : Garbage collection for orphaned storage.
     NpyRef : The lazy array reference returned on fetch.
     SchemaCodec : Base class for schema-addressed codecs.
     ObjectCodec : Schema-addressed storage for files/folders.
diff --git a/src/datajoint/content_registry.py b/src/datajoint/content_registry.py
deleted file mode 100644
index 70b38324a..000000000
--- a/src/datajoint/content_registry.py
+++ /dev/null
@@ -1,231 +0,0 @@
-"""
-Content-addressed storage registry for DataJoint.
-
-This module provides content-addressed storage with deduplication for the <hash>
-Codec. Content is identified by its MD5 hash and stored in a hierarchical
-directory structure: _hash/{hash[:2]}/{hash[2:4]}/{hash}
-
-The ContentRegistry tracks stored content for garbage collection purposes.
-"""
-
-import hashlib
-import logging
-from typing import Any
-
-from .errors import DataJointError
-from .settings import config
-from .storage import StorageBackend
-
-logger = logging.getLogger(__name__.split(".")[0])
-
-
-def compute_content_hash(data: bytes) -> str:
-    """
-    Compute SHA256 hash of content.
-
-    Parameters
-    ----------
-    data : bytes
-        Content bytes.
-
-    Returns
-    -------
-    str
-        Hex-encoded SHA256 hash (64 characters).
-    """
-    return hashlib.sha256(data).hexdigest()
-
-
-def build_content_path(content_hash: str) -> str:
-    """
-    Build the storage path for content-addressed storage.
-
-    Content is stored in a hierarchical structure to avoid too many files
-    in a single directory: _content/{hash[:2]}/{hash[2:4]}/{hash}
-
-    Parameters
-    ----------
-    content_hash : str
-        SHA256 hex hash (64 characters).
-
-    Returns
-    -------
-    str
-        Relative path within the store.
-    """
-    if len(content_hash) != 64:
-        raise DataJointError(f"Invalid content hash length: {len(content_hash)} (expected 64)")
-    return f"_content/{content_hash[:2]}/{content_hash[2:4]}/{content_hash}"
-
-
-def get_store_backend(store_name: str | None = None) -> StorageBackend:
-    """
-    Get a StorageBackend for content storage.
-
-    Parameters
-    ----------
-    store_name : str, optional
-        Name of the store to use. If None, uses the default object storage
-        configuration or the configured default_store.
-
-    Returns
-    -------
-    StorageBackend
-        StorageBackend instance.
-    """
-    # If store_name is None, check for configured default_store
-    if store_name is None and config.object_storage.default_store:
-        store_name = config.object_storage.default_store
-
-    # get_object_store_spec handles None by returning default object_storage config
-    spec = config.get_object_store_spec(store_name)
-    return StorageBackend(spec)
-
-
-def put_content(data: bytes, store_name: str | None = None) -> dict[str, Any]:
-    """
-    Store content using content-addressed storage.
-
-    If the content already exists (same hash), it is not re-uploaded.
-    Returns metadata including the hash, store, and size.
-
-    Parameters
-    ----------
-    data : bytes
-        Content bytes to store.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    dict[str, Any]
-        Metadata dict with keys: hash, store, size.
-    """
-    content_hash = compute_content_hash(data)
-    path = build_content_path(content_hash)
-
-    backend = get_store_backend(store_name)
-
-    # Check if content already exists (deduplication)
-    if not backend.exists(path):
-        backend.put_buffer(data, path)
-        logger.debug(f"Stored new content: {content_hash[:16]}... ({len(data)} bytes)")
-    else:
-        logger.debug(f"Content already exists: {content_hash[:16]}...")
-
-    return {
-        "hash": content_hash,
-        "store": store_name,
-        "size": len(data),
-    }
-
-
-def get_content(content_hash: str, store_name: str | None = None) -> bytes:
-    """
-    Retrieve content by its hash.
-
-    Parameters
-    ----------
-    content_hash : str
-        SHA256 hex hash of the content.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    bytes
-        Content bytes.
-
-    Raises
-    ------
-    MissingExternalFile
-        If content is not found.
-    DataJointError
-        If hash verification fails.
-    """
-    path = build_content_path(content_hash)
-    backend = get_store_backend(store_name)
-
-    data = backend.get_buffer(path)
-
-    # Verify hash (optional but recommended for integrity)
-    actual_hash = compute_content_hash(data)
-    if actual_hash != content_hash:
-        raise DataJointError(f"Content hash mismatch: expected {content_hash[:16]}..., got {actual_hash[:16]}...")
-
-    return data
-
-
-def content_exists(content_hash: str, store_name: str | None = None) -> bool:
-    """
-    Check if content exists in storage.
-
-    Parameters
-    ----------
-    content_hash : str
-        SHA256 hex hash of the content.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    bool
-        True if content exists.
-    """
-    path = build_content_path(content_hash)
-    backend = get_store_backend(store_name)
-    return backend.exists(path)
-
-
-def delete_content(content_hash: str, store_name: str | None = None) -> bool:
-    """
-    Delete content from storage.
-
-    This should only be called after verifying no references exist.
-    Use garbage collection to safely remove unreferenced content.
-
-    Parameters
-    ----------
-    content_hash : str
-        SHA256 hex hash of the content.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    bool
-        True if content was deleted, False if it didn't exist.
-
-    Warnings
-    --------
-    This permanently deletes content. Ensure no references exist first.
-    """
-    path = build_content_path(content_hash)
-    backend = get_store_backend(store_name)
-
-    if backend.exists(path):
-        backend.remove(path)
-        logger.debug(f"Deleted content: {content_hash[:16]}...")
-        return True
-    return False
-
-
-def get_content_size(content_hash: str, store_name: str | None = None) -> int:
-    """
-    Get the size of stored content.
-
-    Parameters
-    ----------
-    content_hash : str
-        SHA256 hex hash of the content.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    int
-        Size in bytes.
-    """
-    path = build_content_path(content_hash)
-    backend = get_store_backend(store_name)
-    return backend.size(path)
diff --git a/src/datajoint/gc.py b/src/datajoint/gc.py
index 7570e6f24..c3f2d6f0f 100644
--- a/src/datajoint/gc.py
+++ b/src/datajoint/gc.py
@@ -1,25 +1,37 @@
 """
 Garbage collection for external storage.
 
-This module provides utilities to identify and remove orphaned content
-from external storage. Content becomes orphaned when all database rows
-referencing it are deleted.
+This module provides utilities to identify and remove orphaned items
+from external storage. Storage items become orphaned when all database rows
+referencing them are deleted.
 
-Supports two storage patterns:
-- Content-addressed storage: <hash@>, <blob@>, <attach@>
-  Stored at: _content/{hash[:2]}/{hash[2:4]}/{hash}
+DataJoint uses two external storage patterns:
 
-- Path-addressed storage: <object@>
-  Stored at: {schema}/{table}/objects/{pk}/{field}_{token}/
+Hash-addressed storage
+    Types: ``<hash@>``, ``<blob@>``, ``<attach@>``
+    Path: ``_hash/{schema}/{hash}`` (with optional subfolding)
+    Deduplication: Per-schema (identical data within a schema shares storage)
+    Deletion: Requires garbage collection
+
+Schema-addressed storage
+    Types: ``<object@>``, ``<npy@>``
+    Path: ``{schema}/{table}/{pk}/{field}/``
+    Deduplication: None (each entity has unique path)
+    Deletion: Requires garbage collection
+
+Usage::
 
-Usage:
     import datajoint as dj
 
-    # Scan schemas and find orphaned content
+    # Scan schemas and find orphaned items
     stats = dj.gc.scan(schema1, schema2, store_name='mystore')
 
-    # Remove orphaned content (dry_run=False to actually delete)
+    # Remove orphaned items (dry_run=False to actually delete)
     stats = dj.gc.collect(schema1, schema2, store_name='mystore', dry_run=True)
+
+See Also
+--------
+datajoint.builtin_codecs : Codec implementations for external storage types.
 """
 
 from __future__ import annotations
@@ -28,7 +40,7 @@
 import logging
 from typing import TYPE_CHECKING, Any
 
-from .content_registry import delete_content, get_store_backend
+from .hash_registry import delete_path, get_store_backend
 from .errors import DataJointError
 
 if TYPE_CHECKING:
@@ -37,14 +49,15 @@
 logger = logging.getLogger(__name__.split(".")[0])
 
 
-def _uses_content_storage(attr) -> bool:
+def _uses_hash_storage(attr) -> bool:
     """
-    Check if an attribute uses content-addressed storage.
+    Check if an attribute uses hash-addressed storage.
 
-    This includes types that chain to <hash> for external storage:
-    - <hash@store> directly
-    - <blob@store> (chains to <hash>)
-    - <attach@store> (chains to <hash>)
+    Hash-addressed types use content deduplication via MD5/Base32 hashing:
+
+    - ``<hash@store>`` - raw hash storage
+    - ``<blob@store>`` - chains to ``<hash>``
+    - ``<attach@store>`` - chains to ``<hash>``
 
     Parameters
     ----------
@@ -54,29 +67,33 @@ def _uses_content_storage(attr) -> bool:
     Returns
     -------
     bool
-        True if the attribute stores content hashes.
+        True if the attribute uses hash-addressed storage.
     """
     if not attr.codec:
         return False
 
-    # Check if this type uses content storage
     codec_name = getattr(attr.codec, "name", "")
     store = getattr(attr, "store", None)
 
-    # <hash> always uses content storage (external only)
+    # <hash> always uses hash-addressed storage (external only)
     if codec_name == "hash":
         return True
 
-    # <blob@> and <attach@> use content storage when external (has store)
+    # <blob@> and <attach@> use hash-addressed storage when external
     if codec_name in ("blob", "attach") and store is not None:
         return True
 
     return False
 
 
-def _uses_object_storage(attr) -> bool:
+def _uses_schema_storage(attr) -> bool:
     """
-    Check if an attribute uses path-addressed object storage.
+    Check if an attribute uses schema-addressed storage.
+
+    Schema-addressed types store data at paths derived from the schema structure:
+
+    - ``<object@store>`` - arbitrary objects (pickled or native formats)
+    - ``<npy@store>`` - NumPy arrays with lazy loading
 
     Parameters
     ----------
@@ -86,28 +103,31 @@ def _uses_object_storage(attr) -> bool:
     Returns
     -------
     bool
-        True if the attribute stores object paths.
+        True if the attribute uses schema-addressed storage.
     """
     if not attr.codec:
         return False
 
     codec_name = getattr(attr.codec, "name", "")
-    return codec_name == "object"
+    return codec_name in ("object", "npy")
 
 
-def _extract_content_refs(value: Any) -> list[tuple[str, str | None]]:
+def _extract_hash_refs(value: Any) -> list[tuple[str, str | None]]:
     """
-    Extract content references from a stored value.
+    Extract path references from hash-addressed storage metadata.
+
+    Hash-addressed storage stores metadata as JSON with ``path`` and ``hash`` keys.
+    The path is used for file operations; the hash is for integrity verification.
 
     Parameters
     ----------
     value : Any
-        The stored value (could be JSON string or dict).
+        The stored value (JSON string or dict).
 
     Returns
     -------
     list[tuple[str, str | None]]
-        List of (content_hash, store_name) tuples.
+        List of (path, store_name) tuples.
     """
     refs = []
 
@@ -121,21 +141,23 @@ def _extract_content_refs(value: Any) -> list[tuple[str, str | None]]:
         except (json.JSONDecodeError, TypeError):
             return refs
 
-    # Extract hash from dict
-    if isinstance(value, dict) and "hash" in value:
-        refs.append((value["hash"], value.get("store")))
+    # Extract path from dict (path is required for new data, hash for legacy)
+    if isinstance(value, dict) and "path" in value:
+        refs.append((value["path"], value.get("store")))
 
     return refs
 
 
-def _extract_object_refs(value: Any) -> list[tuple[str, str | None]]:
+def _extract_schema_refs(value: Any) -> list[tuple[str, str | None]]:
     """
-    Extract object path references from a stored value.
+    Extract schema-addressed path references from a stored value.
+
+    Schema-addressed storage stores metadata as JSON with a ``path`` key.
 
     Parameters
     ----------
     value : Any
-        The stored value (could be JSON string or dict).
+        The stored value (JSON string or dict).
 
     Returns
     -------
@@ -161,16 +183,17 @@ def _extract_object_refs(value: Any) -> list[tuple[str, str | None]]:
     return refs
 
 
-def scan_references(
+def scan_hash_references(
     *schemas: "Schema",
     store_name: str | None = None,
     verbose: bool = False,
 ) -> set[str]:
     """
-    Scan schemas for content references.
+    Scan schemas for hash-addressed storage references.
 
-    Examines all tables in the given schemas and extracts content hashes
-    from columns that use content-addressed storage (<hash@>, <blob@>, <attach@>).
+    Examines all tables in the given schemas and extracts storage paths
+    from columns that use hash-addressed storage (``<hash@>``, ``<blob@>``,
+    ``<attach@>``).
 
     Parameters
     ----------
@@ -184,7 +207,7 @@ def scan_references(
     Returns
     -------
     set[str]
-        Set of content hashes that are referenced.
+        Set of storage paths that are referenced.
     """
     referenced: set[str] = set()
 
@@ -198,23 +221,22 @@ def scan_references(
                 # Get table class
                 table = schema.get_table(table_name)
 
-                # Check each attribute for content storage
+                # Check each attribute for hash-addressed storage
                 for attr_name, attr in table.heading.attributes.items():
-                    if not _uses_content_storage(attr):
+                    if not _uses_hash_storage(attr):
                         continue
 
                     if verbose:
                         logger.info(f"  Scanning {table_name}.{attr_name}")
 
                     # Fetch all values for this attribute
-                    # Use to_arrays to get attribute values
                     try:
                         values = table.to_arrays(attr_name)
                         for value in values:
-                            for content_hash, ref_store in _extract_content_refs(value):
+                            for path, ref_store in _extract_hash_refs(value):
                                 # Filter by store if specified
                                 if store_name is None or ref_store == store_name:
-                                    referenced.add(content_hash)
+                                    referenced.add(path)
                     except Exception as e:
                         logger.warning(f"Error scanning {table_name}.{attr_name}: {e}")
 
@@ -224,16 +246,16 @@ def scan_references(
     return referenced
 
 
-def scan_object_references(
+def scan_schema_references(
     *schemas: "Schema",
     store_name: str | None = None,
     verbose: bool = False,
 ) -> set[str]:
     """
-    Scan schemas for object path references.
+    Scan schemas for schema-addressed storage references.
 
-    Examines all tables in the given schemas and extracts object paths
-    from columns that use path-addressed storage (<object>).
+    Examines all tables in the given schemas and extracts paths from columns
+    that use schema-addressed storage (``<object@>``, ``<npy@>``).
 
     Parameters
     ----------
@@ -247,13 +269,13 @@ def scan_object_references(
     Returns
     -------
     set[str]
-        Set of object paths that are referenced.
+        Set of storage paths that are referenced.
     """
     referenced: set[str] = set()
 
     for schema in schemas:
         if verbose:
-            logger.info(f"Scanning schema for objects: {schema.database}")
+            logger.info(f"Scanning schema for schema-addressed storage: {schema.database}")
 
         # Get all tables in schema
         for table_name in schema.list_tables():
@@ -261,9 +283,9 @@ def scan_object_references(
                 # Get table class
                 table = schema.get_table(table_name)
 
-                # Check each attribute for object storage
+                # Check each attribute for schema-addressed storage
                 for attr_name, attr in table.heading.attributes.items():
-                    if not _uses_object_storage(attr):
+                    if not _uses_schema_storage(attr):
                         continue
 
                     if verbose:
@@ -273,7 +295,7 @@ def scan_object_references(
                     try:
                         values = table.to_arrays(attr_name)
                         for value in values:
-                            for path, ref_store in _extract_object_refs(value):
+                            for path, ref_store in _extract_schema_refs(value):
                                 # Filter by store if specified
                                 if store_name is None or ref_store == store_name:
                                     referenced.add(path)
@@ -286,12 +308,13 @@ def scan_object_references(
     return referenced
 
 
-def list_stored_content(store_name: str | None = None) -> dict[str, int]:
+def list_stored_hashes(store_name: str | None = None) -> dict[str, int]:
     """
-    List all content hashes in storage.
+    List all hash-addressed items in storage.
 
-    Scans the _content/ directory in the specified store and returns
-    all content hashes found.
+    Scans the ``_hash/`` directory in the specified store and returns
+    all storage paths found. These correspond to ``<hash@>``, ``<blob@>``,
+    and ``<attach@>`` types.
 
     Parameters
     ----------
@@ -301,17 +324,20 @@ def list_stored_content(store_name: str | None = None) -> dict[str, int]:
     Returns
     -------
     dict[str, int]
-        Dict mapping content_hash to size in bytes.
+        Dict mapping storage path to size in bytes.
     """
+    import re
+
     backend = get_store_backend(store_name)
     stored: dict[str, int] = {}
 
-    # Content is stored at _content/{hash[:2]}/{hash[2:4]}/{hash}
-    content_prefix = "_content/"
+    # Hash-addressed storage: _hash/{schema}/{subfolders...}/{hash}
+    hash_prefix = "_hash/"
+    # Base32 pattern: 26 lowercase alphanumeric chars
+    base32_pattern = re.compile(r"^[a-z2-7]{26}$")
 
     try:
-        # List all files under _content/
-        full_prefix = backend._full_path(content_prefix)
+        full_prefix = backend._full_path(hash_prefix)
 
         for root, dirs, files in backend.fs.walk(full_prefix):
             for filename in files:
@@ -319,33 +345,36 @@ def list_stored_content(store_name: str | None = None) -> dict[str, int]:
                 if filename.endswith(".manifest.json"):
                     continue
 
-                # The filename is the full hash
+                # The filename is the base32 hash
                 content_hash = filename
 
-                # Validate it looks like a hash (64 hex chars)
-                if len(content_hash) == 64 and all(c in "0123456789abcdef" for c in content_hash):
+                # Validate it looks like a base32 hash
+                if base32_pattern.match(content_hash):
                     try:
                         file_path = f"{root}/{filename}"
                         size = backend.fs.size(file_path)
-                        stored[content_hash] = size
+                        # Build relative path for comparison with stored metadata
+                        # Path format: _hash/{schema}/{subfolders...}/{hash}
+                        relative_path = file_path.replace(backend._full_path(""), "").lstrip("/")
+                        stored[relative_path] = size
                     except Exception:
-                        stored[content_hash] = 0
+                        pass
 
     except FileNotFoundError:
-        # No _content/ directory exists yet
+        # No _hash/ directory exists yet
         pass
     except Exception as e:
-        logger.warning(f"Error listing stored content: {e}")
+        logger.warning(f"Error listing stored hashes: {e}")
 
     return stored
 
 
-def list_stored_objects(store_name: str | None = None) -> dict[str, int]:
+def list_schema_paths(store_name: str | None = None) -> dict[str, int]:
     """
-    List all object paths in storage.
+    List all schema-addressed items in storage.
 
-    Scans for directories matching the object storage pattern:
-    {schema}/{table}/objects/{pk}/{field}_{token}/
+    Scans for directories matching the schema-addressed storage pattern:
+    ``{schema}/{table}/{pk}/{field}/``
 
     Parameters
     ----------
@@ -355,55 +384,57 @@ def list_stored_objects(store_name: str | None = None) -> dict[str, int]:
     Returns
     -------
     dict[str, int]
-        Dict mapping object_path to size in bytes.
+        Dict mapping storage path to size in bytes.
     """
     backend = get_store_backend(store_name)
     stored: dict[str, int] = {}
 
     try:
-        # Walk the storage looking for /objects/ directories
+        # Walk the storage looking for schema-addressed paths
         full_prefix = backend._full_path("")
 
         for root, dirs, files in backend.fs.walk(full_prefix):
-            # Skip _content directory
-            if "_content" in root:
+            # Skip _hash directory (hash-addressed storage)
+            if "_hash" in root:
                 continue
 
-            # Look for "objects" directory pattern
-            if "/objects/" in root:
-                # This could be an object storage path
-                # Path pattern: {schema}/{table}/objects/{pk}/{field}_{token}
-                relative_path = root.replace(full_prefix, "").lstrip("/")
+            # Look for schema-addressed pattern (has files, not in _hash)
+            # Schema-addressed paths: {schema}/{table}/{pk}/{field}/
+            relative_path = root.replace(full_prefix, "").lstrip("/")
 
-                # Calculate total size of this object directory
-                total_size = 0
-                for file in files:
-                    try:
-                        file_path = f"{root}/{file}"
-                        total_size += backend.fs.size(file_path)
-                    except Exception:
-                        pass
+            # Skip empty paths and root-level directories
+            if not relative_path or relative_path.count("/") < 2:
+                continue
+
+            # Calculate total size of this directory
+            total_size = 0
+            for file in files:
+                try:
+                    file_path = f"{root}/{file}"
+                    total_size += backend.fs.size(file_path)
+                except Exception:
+                    pass
 
-                # Only count directories with files (actual objects)
-                if total_size > 0 or files:
-                    stored[relative_path] = total_size
+            # Only count directories with files (actual objects)
+            if total_size > 0 or files:
+                stored[relative_path] = total_size
 
     except FileNotFoundError:
         pass
     except Exception as e:
-        logger.warning(f"Error listing stored objects: {e}")
+        logger.warning(f"Error listing stored schemas: {e}")
 
     return stored
 
 
-def delete_object(path: str, store_name: str | None = None) -> bool:
+def delete_schema_path(path: str, store_name: str | None = None) -> bool:
     """
-    Delete an object directory from storage.
+    Delete a schema-addressed directory from storage.
 
     Parameters
     ----------
     path : str
-        Object path (relative to store root).
+        Storage path (relative to store root).
     store_name : str, optional
         Store name (None = default store).
 
@@ -419,10 +450,10 @@ def delete_object(path: str, store_name: str | None = None) -> bool:
         if backend.fs.exists(full_path):
             # Remove entire directory tree
             backend.fs.rm(full_path, recursive=True)
-            logger.debug(f"Deleted object: {path}")
+            logger.debug(f"Deleted schema path: {path}")
             return True
     except Exception as e:
-        logger.warning(f"Error deleting object {path}: {e}")
+        logger.warning(f"Error deleting schema path {path}: {e}")
 
     return False
 
@@ -433,10 +464,10 @@ def scan(
     verbose: bool = False,
 ) -> dict[str, Any]:
     """
-    Scan for orphaned content and objects without deleting.
+    Scan for orphaned storage items without deleting.
 
-    Scans both content-addressed storage (for <hash@>, <blob@>, <attach@>)
-    and path-addressed storage (for <object>).
+    Scans both hash-addressed storage (for ``<hash@>``, ``<blob@>``, ``<attach@>``)
+    and schema-addressed storage (for ``<object@>``, ``<npy@>``).
 
     Parameters
     ----------
@@ -452,50 +483,50 @@ def scan(
     dict[str, Any]
         Dict with scan statistics:
 
-        - content_referenced: Number of content items referenced in database
-        - content_stored: Number of content items in storage
-        - content_orphaned: Number of unreferenced content items
-        - content_orphaned_bytes: Total size of orphaned content
+        - hash_referenced: Number of hash items referenced in database
+        - hash_stored: Number of hash items in storage
+        - hash_orphaned: Number of unreferenced hash items
+        - hash_orphaned_bytes: Total size of orphaned hashes
         - orphaned_hashes: List of orphaned content hashes
-        - object_referenced: Number of objects referenced in database
-        - object_stored: Number of objects in storage
-        - object_orphaned: Number of unreferenced objects
-        - object_orphaned_bytes: Total size of orphaned objects
-        - orphaned_paths: List of orphaned object paths
+        - schema_paths_referenced: Number of schema items referenced in database
+        - schema_paths_stored: Number of schema items in storage
+        - schema_paths_orphaned: Number of unreferenced schema items
+        - schema_paths_orphaned_bytes: Total size of orphaned schema items
+        - orphaned_paths: List of orphaned schema paths
     """
     if not schemas:
         raise DataJointError("At least one schema must be provided")
 
-    # --- Content-addressed storage ---
-    content_referenced = scan_references(*schemas, store_name=store_name, verbose=verbose)
-    content_stored = list_stored_content(store_name)
-    orphaned_hashes = set(content_stored.keys()) - content_referenced
-    content_orphaned_bytes = sum(content_stored.get(h, 0) for h in orphaned_hashes)
+    # --- Hash-addressed storage ---
+    hash_referenced = scan_hash_references(*schemas, store_name=store_name, verbose=verbose)
+    hash_stored = list_stored_hashes(store_name)
+    orphaned_hashes = set(hash_stored.keys()) - hash_referenced
+    hash_orphaned_bytes = sum(hash_stored.get(h, 0) for h in orphaned_hashes)
 
-    # --- Path-addressed storage (objects) ---
-    object_referenced = scan_object_references(*schemas, store_name=store_name, verbose=verbose)
-    object_stored = list_stored_objects(store_name)
-    orphaned_paths = set(object_stored.keys()) - object_referenced
-    object_orphaned_bytes = sum(object_stored.get(p, 0) for p in orphaned_paths)
+    # --- Schema-addressed storage ---
+    schema_paths_referenced = scan_schema_references(*schemas, store_name=store_name, verbose=verbose)
+    schema_paths_stored = list_schema_paths(store_name)
+    orphaned_paths = set(schema_paths_stored.keys()) - schema_paths_referenced
+    schema_paths_orphaned_bytes = sum(schema_paths_stored.get(p, 0) for p in orphaned_paths)
 
     return {
-        # Content-addressed storage stats
-        "content_referenced": len(content_referenced),
-        "content_stored": len(content_stored),
-        "content_orphaned": len(orphaned_hashes),
-        "content_orphaned_bytes": content_orphaned_bytes,
+        # Hash-addressed storage stats
+        "hash_referenced": len(hash_referenced),
+        "hash_stored": len(hash_stored),
+        "hash_orphaned": len(orphaned_hashes),
+        "hash_orphaned_bytes": hash_orphaned_bytes,
         "orphaned_hashes": sorted(orphaned_hashes),
-        # Path-addressed storage stats
-        "object_referenced": len(object_referenced),
-        "object_stored": len(object_stored),
-        "object_orphaned": len(orphaned_paths),
-        "object_orphaned_bytes": object_orphaned_bytes,
+        # Schema-addressed storage stats
+        "schema_paths_referenced": len(schema_paths_referenced),
+        "schema_paths_stored": len(schema_paths_stored),
+        "schema_paths_orphaned": len(orphaned_paths),
+        "schema_paths_orphaned_bytes": schema_paths_orphaned_bytes,
         "orphaned_paths": sorted(orphaned_paths),
         # Combined totals
-        "referenced": len(content_referenced) + len(object_referenced),
-        "stored": len(content_stored) + len(object_stored),
+        "referenced": len(hash_referenced) + len(schema_paths_referenced),
+        "stored": len(hash_stored) + len(schema_paths_stored),
         "orphaned": len(orphaned_hashes) + len(orphaned_paths),
-        "orphaned_bytes": content_orphaned_bytes + object_orphaned_bytes,
+        "orphaned_bytes": hash_orphaned_bytes + schema_paths_orphaned_bytes,
     }
 
 
@@ -506,10 +537,10 @@ def collect(
     verbose: bool = False,
 ) -> dict[str, Any]:
     """
-    Remove orphaned content and objects from storage.
+    Remove orphaned storage items.
 
-    Scans the given schemas for content and object references, then removes any
-    storage items that are not referenced.
+    Scans the given schemas for storage references, then removes any
+    items that are not referenced.
 
     Parameters
     ----------
@@ -530,66 +561,66 @@ def collect(
         - referenced: Total items referenced in database
         - stored: Total items in storage
         - orphaned: Total unreferenced items
-        - content_deleted: Number of content items deleted
-        - object_deleted: Number of object items deleted
+        - hash_deleted: Number of hash items deleted
+        - schema_paths_deleted: Number of schema items deleted
         - deleted: Total items deleted (0 if dry_run)
         - bytes_freed: Bytes freed (0 if dry_run)
         - errors: Number of deletion errors
     """
-    # First scan to find orphaned content and objects
+    # First scan to find orphaned items
     stats = scan(*schemas, store_name=store_name, verbose=verbose)
 
-    content_deleted = 0
-    object_deleted = 0
+    hash_deleted = 0
+    schema_paths_deleted = 0
     bytes_freed = 0
     errors = 0
 
     if not dry_run:
-        # Delete orphaned content (hash-addressed)
-        if stats["content_orphaned"] > 0:
-            content_stored = list_stored_content(store_name)
+        # Delete orphaned hashes
+        if stats["hash_orphaned"] > 0:
+            hash_stored = list_stored_hashes(store_name)
 
-            for content_hash in stats["orphaned_hashes"]:
+            for path in stats["orphaned_hashes"]:
                 try:
-                    size = content_stored.get(content_hash, 0)
-                    if delete_content(content_hash, store_name):
-                        content_deleted += 1
+                    size = hash_stored.get(path, 0)
+                    if delete_path(path, store_name):
+                        hash_deleted += 1
                         bytes_freed += size
                         if verbose:
-                            logger.info(f"Deleted content: {content_hash[:16]}... ({size} bytes)")
+                            logger.info(f"Deleted: {path} ({size} bytes)")
                 except Exception as e:
                     errors += 1
-                    logger.warning(f"Failed to delete content {content_hash[:16]}...: {e}")
+                    logger.warning(f"Failed to delete {path}: {e}")
 
-        # Delete orphaned objects (path-addressed)
-        if stats["object_orphaned"] > 0:
-            object_stored = list_stored_objects(store_name)
+        # Delete orphaned schema paths
+        if stats["schema_paths_orphaned"] > 0:
+            schema_paths_stored = list_schema_paths(store_name)
 
             for path in stats["orphaned_paths"]:
                 try:
-                    size = object_stored.get(path, 0)
-                    if delete_object(path, store_name):
-                        object_deleted += 1
+                    size = schema_paths_stored.get(path, 0)
+                    if delete_schema_path(path, store_name):
+                        schema_paths_deleted += 1
                         bytes_freed += size
                         if verbose:
-                            logger.info(f"Deleted object: {path} ({size} bytes)")
+                            logger.info(f"Deleted schema path: {path} ({size} bytes)")
                 except Exception as e:
                     errors += 1
-                    logger.warning(f"Failed to delete object {path}: {e}")
+                    logger.warning(f"Failed to delete schema path {path}: {e}")
 
     return {
         "referenced": stats["referenced"],
         "stored": stats["stored"],
         "orphaned": stats["orphaned"],
-        "content_deleted": content_deleted,
-        "object_deleted": object_deleted,
-        "deleted": content_deleted + object_deleted,
+        "hash_deleted": hash_deleted,
+        "schema_paths_deleted": schema_paths_deleted,
+        "deleted": hash_deleted + schema_paths_deleted,
         "bytes_freed": bytes_freed,
         "errors": errors,
         "dry_run": dry_run,
         # Include detailed stats
-        "content_orphaned": stats["content_orphaned"],
-        "object_orphaned": stats["object_orphaned"],
+        "hash_orphaned": stats["hash_orphaned"],
+        "schema_paths_orphaned": stats["schema_paths_orphaned"],
     }
 
 
@@ -609,26 +640,26 @@ def format_stats(stats: dict[str, Any]) -> str:
     """
     lines = ["External Storage Statistics:"]
 
-    # Show content-addressed storage stats if present
-    if "content_referenced" in stats:
+    # Show hash-addressed storage stats if present
+    if "hash_referenced" in stats:
         lines.append("")
-        lines.append("Content-Addressed Storage (<hash@>, <blob@>, <attach@>):")
-        lines.append(f"  Referenced: {stats['content_referenced']}")
-        lines.append(f"  Stored:     {stats['content_stored']}")
-        lines.append(f"  Orphaned:   {stats['content_orphaned']}")
-        if "content_orphaned_bytes" in stats:
-            size_mb = stats["content_orphaned_bytes"] / (1024 * 1024)
+        lines.append("Hash-Addressed Storage (<hash@>, <blob@>, <attach@>):")
+        lines.append(f"  Referenced: {stats['hash_referenced']}")
+        lines.append(f"  Stored:     {stats['hash_stored']}")
+        lines.append(f"  Orphaned:   {stats['hash_orphaned']}")
+        if "hash_orphaned_bytes" in stats:
+            size_mb = stats["hash_orphaned_bytes"] / (1024 * 1024)
             lines.append(f"  Orphaned size: {size_mb:.2f} MB")
 
-    # Show path-addressed storage stats if present
-    if "object_referenced" in stats:
+    # Show schema-addressed storage stats if present
+    if "schema_paths_referenced" in stats:
         lines.append("")
-        lines.append("Path-Addressed Storage (<object>):")
-        lines.append(f"  Referenced: {stats['object_referenced']}")
-        lines.append(f"  Stored:     {stats['object_stored']}")
-        lines.append(f"  Orphaned:   {stats['object_orphaned']}")
-        if "object_orphaned_bytes" in stats:
-            size_mb = stats["object_orphaned_bytes"] / (1024 * 1024)
+        lines.append("Schema-Addressed Storage (<object@>, <npy@>):")
+        lines.append(f"  Referenced: {stats['schema_paths_referenced']}")
+        lines.append(f"  Stored:     {stats['schema_paths_stored']}")
+        lines.append(f"  Orphaned:   {stats['schema_paths_orphaned']}")
+        if "schema_paths_orphaned_bytes" in stats:
+            size_mb = stats["schema_paths_orphaned_bytes"] / (1024 * 1024)
             lines.append(f"  Orphaned size: {size_mb:.2f} MB")
 
     # Show totals
@@ -649,10 +680,10 @@ def format_stats(stats: dict[str, Any]) -> str:
             lines.append("  [DRY RUN - no changes made]")
         else:
             lines.append(f"  Deleted:     {stats['deleted']}")
-            if "content_deleted" in stats:
-                lines.append(f"    Content: {stats['content_deleted']}")
-            if "object_deleted" in stats:
-                lines.append(f"    Objects: {stats['object_deleted']}")
+            if "hash_deleted" in stats:
+                lines.append(f"    Hash items:   {stats['hash_deleted']}")
+            if "schema_paths_deleted" in stats:
+                lines.append(f"    Schema paths: {stats['schema_paths_deleted']}")
             freed_mb = stats["bytes_freed"] / (1024 * 1024)
             lines.append(f"  Bytes freed: {freed_mb:.2f} MB")
             if stats.get("errors", 0) > 0:
diff --git a/src/datajoint/hash_registry.py b/src/datajoint/hash_registry.py
new file mode 100644
index 000000000..7b286e874
--- /dev/null
+++ b/src/datajoint/hash_registry.py
@@ -0,0 +1,415 @@
+"""
+Hash-addressed storage registry for DataJoint.
+
+This module provides hash-addressed storage with deduplication for the ``<hash>``
+codec. Content is identified by a Base32-encoded MD5 hash and stored with
+per-schema isolation::
+
+    _hash/{schema}/{hash}
+
+With optional subfolding (configured per-store)::
+
+    _hash/{schema}/{fold1}/{fold2}/{hash}
+
+Subfolding creates directory hierarchies to improve performance on filesystems
+that struggle with large directories (ext3, FAT32, NFS). Modern filesystems
+(ext4, XFS, ZFS, S3) handle flat directories efficiently.
+
+**Storage Model:**
+
+- **Hash** is used for content identification (deduplication, integrity verification)
+- **Path** is always stored in metadata and used for all file operations
+
+This design protects against configuration changes (e.g., subfolding) affecting
+existing data. The path stored at insert time is always used for retrieval.
+
+Hash-addressed storage is used by ``<hash@>``, ``<blob@>``, and ``<attach@>`` types.
+Deduplication occurs within each schema. Deletion requires garbage collection
+via ``dj.gc.collect()``.
+
+See Also
+--------
+datajoint.gc : Garbage collection for orphaned storage items.
+"""
+
+import base64
+import hashlib
+import logging
+from typing import Any
+
+from .errors import DataJointError
+from .settings import config
+from .storage import StorageBackend
+
+logger = logging.getLogger(__name__.split(".")[0])
+
+
+def compute_hash(data: bytes) -> str:
+    """
+    Compute Base32-encoded MD5 hash of content.
+
+    Parameters
+    ----------
+    data : bytes
+        Content bytes.
+
+    Returns
+    -------
+    str
+        Base32-encoded hash (26 lowercase characters, no padding).
+    """
+    md5_digest = hashlib.md5(data).digest()
+    # Base32 encode, remove padding, lowercase for filesystem compatibility
+    return base64.b32encode(md5_digest).decode("ascii").rstrip("=").lower()
+
+
+def _subfold(name: str, folds: tuple[int, ...]) -> tuple[str, ...]:
+    """
+    Create subfolding hierarchy from a hash string.
+
+    Parameters
+    ----------
+    name : str
+        Hash string to subfold.
+    folds : tuple[int, ...]
+        Lengths of each subfolder level.
+
+    Returns
+    -------
+    tuple[str, ...]
+        Subfolder names.
+
+    Examples
+    --------
+    >>> _subfold("abcdefgh", (2, 3))
+    ('ab', 'cde')
+    """
+    if not folds:
+        return ()
+    return (name[: folds[0]],) + _subfold(name[folds[0] :], folds[1:])
+
+
+def build_hash_path(
+    content_hash: str,
+    schema_name: str,
+    subfolding: tuple[int, ...] | None = None,
+) -> str:
+    """
+    Build the storage path for hash-addressed storage.
+
+    Path structure without subfolding::
+
+        _hash/{schema}/{hash}
+
+    Path structure with subfolding (e.g., (2, 2))::
+
+        _hash/{schema}/{fold1}/{fold2}/{hash}
+
+    Parameters
+    ----------
+    content_hash : str
+        Base32-encoded hash (26 characters).
+    schema_name : str
+        Database/schema name for isolation.
+    subfolding : tuple[int, ...], optional
+        Subfolding pattern from store config. None means flat (no subfolding).
+
+    Returns
+    -------
+    str
+        Relative path within the store.
+    """
+    # Validate hash format (26 base32 chars, lowercase alphanumeric)
+    if not (len(content_hash) == 26 and content_hash.isalnum() and content_hash.islower()):
+        raise DataJointError(f"Invalid content hash (expected 26-char lowercase base32): {content_hash}")
+
+    if subfolding:
+        folds = _subfold(content_hash, subfolding)
+        fold_path = "/".join(folds)
+        return f"_hash/{schema_name}/{fold_path}/{content_hash}"
+    else:
+        return f"_hash/{schema_name}/{content_hash}"
+
+
+def get_store_backend(store_name: str | None = None) -> StorageBackend:
+    """
+    Get a StorageBackend for hash-addressed storage.
+
+    Parameters
+    ----------
+    store_name : str, optional
+        Name of the store to use. If None, uses the default object storage
+        configuration or the configured default_store.
+
+    Returns
+    -------
+    StorageBackend
+        StorageBackend instance.
+    """
+    # If store_name is None, check for configured default_store
+    if store_name is None and config.object_storage.default_store:
+        store_name = config.object_storage.default_store
+
+    # get_object_store_spec handles None by returning default object_storage config
+    spec = config.get_object_store_spec(store_name)
+    return StorageBackend(spec)
+
+
+def get_store_subfolding(store_name: str | None = None) -> tuple[int, ...] | None:
+    """
+    Get the subfolding configuration for a store.
+
+    Parameters
+    ----------
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    tuple[int, ...] | None
+        Subfolding pattern (e.g., (2, 2)) or None for flat storage.
+    """
+    spec = config.get_object_store_spec(store_name)
+    subfolding = spec.get("subfolding")
+    if subfolding is not None:
+        return tuple(subfolding)
+    return None
+
+
+def put_hash(
+    data: bytes,
+    schema_name: str,
+    store_name: str | None = None,
+) -> dict[str, Any]:
+    """
+    Store content using hash-addressed storage.
+
+    If the content already exists (same hash in same schema), it is not
+    re-uploaded. Returns metadata including the hash, path, store, and size.
+
+    The path is always stored in metadata and used for retrieval, protecting
+    against configuration changes (e.g., subfolding) affecting existing data.
+
+    Parameters
+    ----------
+    data : bytes
+        Content bytes to store.
+    schema_name : str
+        Database/schema name for path isolation.
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    dict[str, Any]
+        Metadata dict with keys: hash, path, schema, store, size.
+    """
+    content_hash = compute_hash(data)
+    subfolding = get_store_subfolding(store_name)
+    path = build_hash_path(content_hash, schema_name, subfolding)
+
+    backend = get_store_backend(store_name)
+
+    # Check if content already exists (deduplication within schema)
+    if not backend.exists(path):
+        backend.put_buffer(data, path)
+        logger.debug(f"Stored new hash: {content_hash} ({len(data)} bytes)")
+    else:
+        logger.debug(f"Hash already exists: {content_hash}")
+
+    return {
+        "hash": content_hash,
+        "path": path,  # Always stored for retrieval
+        "schema": schema_name,
+        "store": store_name,
+        "size": len(data),
+    }
+
+
+def get_hash(metadata: dict[str, Any]) -> bytes:
+    """
+    Retrieve content using stored metadata.
+
+    Uses the stored path directly (not derived from hash) to protect against
+    configuration changes affecting existing data.
+
+    Parameters
+    ----------
+    metadata : dict
+        Metadata dict with keys: path, hash, store (optional).
+
+    Returns
+    -------
+    bytes
+        Content bytes.
+
+    Raises
+    ------
+    MissingExternalFile
+        If content is not found at the stored path.
+    DataJointError
+        If hash verification fails (data corruption).
+    """
+    path = metadata["path"]
+    expected_hash = metadata["hash"]
+    store_name = metadata.get("store")
+
+    backend = get_store_backend(store_name)
+    data = backend.get_buffer(path)
+
+    # Verify hash for integrity
+    actual_hash = compute_hash(data)
+    if actual_hash != expected_hash:
+        raise DataJointError(
+            f"Hash mismatch: expected {expected_hash}, got {actual_hash}. " f"Data at {path} may be corrupted."
+        )
+
+    return data
+
+
+def hash_exists(
+    content_hash: str,
+    schema_name: str,
+    store_name: str | None = None,
+) -> bool:
+    """
+    Check if hash-addressed content exists in storage.
+
+    Parameters
+    ----------
+    content_hash : str
+        Base32-encoded hash (26 characters).
+    schema_name : str
+        Database/schema name for path isolation.
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    bool
+        True if content exists.
+    """
+    subfolding = get_store_subfolding(store_name)
+    path = build_hash_path(content_hash, schema_name, subfolding)
+    backend = get_store_backend(store_name)
+    return backend.exists(path)
+
+
+def delete_path(
+    path: str,
+    store_name: str | None = None,
+) -> bool:
+    """
+    Delete content at the specified path from storage.
+
+    This should only be called after verifying no references exist.
+    Use garbage collection to safely remove unreferenced content.
+
+    Parameters
+    ----------
+    path : str
+        Storage path (as stored in metadata).
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    bool
+        True if content was deleted, False if it didn't exist.
+
+    Warnings
+    --------
+    This permanently deletes content. Ensure no references exist first.
+    """
+    backend = get_store_backend(store_name)
+
+    if backend.exists(path):
+        backend.remove(path)
+        logger.debug(f"Deleted: {path}")
+        return True
+    return False
+
+
+# Backward compatibility alias
+def delete_hash(
+    content_hash: str,
+    schema_name: str,
+    store_name: str | None = None,
+) -> bool:
+    """
+    Delete hash-addressed content from storage (deprecated).
+
+    .. deprecated::
+        Use :func:`delete_path` with the stored path instead.
+
+    Parameters
+    ----------
+    content_hash : str
+        Base32-encoded hash (26 characters).
+    schema_name : str
+        Database/schema name for path isolation.
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    bool
+        True if content was deleted, False if it didn't exist.
+    """
+    subfolding = get_store_subfolding(store_name)
+    path = build_hash_path(content_hash, schema_name, subfolding)
+    return delete_path(path, store_name)
+
+
+def get_size(
+    path: str,
+    store_name: str | None = None,
+) -> int:
+    """
+    Get the size of content at the specified path.
+
+    Parameters
+    ----------
+    path : str
+        Storage path (as stored in metadata).
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    int
+        Size in bytes.
+    """
+    backend = get_store_backend(store_name)
+    return backend.size(path)
+
+
+# Backward compatibility alias
+def get_hash_size(
+    content_hash: str,
+    schema_name: str,
+    store_name: str | None = None,
+) -> int:
+    """
+    Get the size of hash-addressed content (deprecated).
+
+    .. deprecated::
+        Use :func:`get_size` with the stored path instead.
+
+    Parameters
+    ----------
+    content_hash : str
+        Base32-encoded hash (26 characters).
+    schema_name : str
+        Database/schema name for path isolation.
+    store_name : str, optional
+        Name of the store. If None, uses default store.
+
+    Returns
+    -------
+    int
+        Size in bytes.
+    """
+    subfolding = get_store_subfolding(store_name)
+    path = build_hash_path(content_hash, schema_name, subfolding)
+    return get_size(path, store_name)
diff --git a/tests/integration/test_content_storage.py b/tests/integration/test_content_storage.py
deleted file mode 100644
index e6d0f14cc..000000000
--- a/tests/integration/test_content_storage.py
+++ /dev/null
@@ -1,231 +0,0 @@
-"""
-Tests for content-addressed storage (content_registry.py).
-"""
-
-import hashlib
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from datajoint.content_registry import (
-    build_content_path,
-    compute_content_hash,
-    content_exists,
-    delete_content,
-    get_content,
-    get_content_size,
-    put_content,
-)
-from datajoint.errors import DataJointError
-
-
-class TestComputeContentHash:
-    """Tests for compute_content_hash function."""
-
-    def test_computes_sha256(self):
-        """Test that SHA256 hash is computed correctly."""
-        data = b"Hello, World!"
-        result = compute_content_hash(data)
-
-        # Verify against known SHA256 hash
-        expected = hashlib.sha256(data).hexdigest()
-        assert result == expected
-        assert len(result) == 64  # SHA256 produces 64 hex chars
-
-    def test_empty_bytes(self):
-        """Test hashing empty bytes."""
-        result = compute_content_hash(b"")
-        expected = hashlib.sha256(b"").hexdigest()
-        assert result == expected
-
-    def test_different_content_different_hash(self):
-        """Test that different content produces different hashes."""
-        hash1 = compute_content_hash(b"content1")
-        hash2 = compute_content_hash(b"content2")
-        assert hash1 != hash2
-
-    def test_same_content_same_hash(self):
-        """Test that same content produces same hash."""
-        data = b"identical content"
-        hash1 = compute_content_hash(data)
-        hash2 = compute_content_hash(data)
-        assert hash1 == hash2
-
-
-class TestBuildContentPath:
-    """Tests for build_content_path function."""
-
-    def test_builds_hierarchical_path(self):
-        """Test that path is built with proper hierarchy."""
-        # Example hash: abcdef...
-        test_hash = "abcdef0123456789" * 4  # 64 chars
-        result = build_content_path(test_hash)
-
-        # Path should be _content/{hash[:2]}/{hash[2:4]}/{hash}
-        assert result == f"_content/ab/cd/{test_hash}"
-
-    def test_rejects_invalid_hash_length(self):
-        """Test that invalid hash length raises error."""
-        with pytest.raises(DataJointError, match="Invalid content hash length"):
-            build_content_path("tooshort")
-
-        with pytest.raises(DataJointError, match="Invalid content hash length"):
-            build_content_path("a" * 65)  # Too long
-
-    def test_real_hash_path(self):
-        """Test path building with a real computed hash."""
-        data = b"test content"
-        content_hash = compute_content_hash(data)
-        path = build_content_path(content_hash)
-
-        # Verify structure
-        parts = path.split("/")
-        assert parts[0] == "_content"
-        assert len(parts[1]) == 2
-        assert len(parts[2]) == 2
-        assert len(parts[3]) == 64
-        assert parts[1] == content_hash[:2]
-        assert parts[2] == content_hash[2:4]
-        assert parts[3] == content_hash
-
-
-class TestPutContent:
-    """Tests for put_content function."""
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_stores_new_content(self, mock_get_backend):
-        """Test storing new content."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = False
-        mock_get_backend.return_value = mock_backend
-
-        data = b"new content"
-        result = put_content(data, store_name="test_store")
-
-        # Verify return value
-        assert "hash" in result
-        assert result["hash"] == compute_content_hash(data)
-        assert result["store"] == "test_store"
-        assert result["size"] == len(data)
-
-        # Verify backend was called
-        mock_backend.put_buffer.assert_called_once()
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_deduplicates_existing_content(self, mock_get_backend):
-        """Test that existing content is not re-uploaded."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = True  # Content already exists
-        mock_get_backend.return_value = mock_backend
-
-        data = b"existing content"
-        result = put_content(data, store_name="test_store")
-
-        # Verify return value is still correct
-        assert result["hash"] == compute_content_hash(data)
-        assert result["size"] == len(data)
-
-        # Verify put_buffer was NOT called (deduplication)
-        mock_backend.put_buffer.assert_not_called()
-
-
-class TestGetContent:
-    """Tests for get_content function."""
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_retrieves_content(self, mock_get_backend):
-        """Test retrieving content by hash."""
-        data = b"stored content"
-        content_hash = compute_content_hash(data)
-
-        mock_backend = MagicMock()
-        mock_backend.get_buffer.return_value = data
-        mock_get_backend.return_value = mock_backend
-
-        result = get_content(content_hash, store_name="test_store")
-
-        assert result == data
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_verifies_hash(self, mock_get_backend):
-        """Test that hash is verified on retrieval."""
-        data = b"original content"
-        content_hash = compute_content_hash(data)
-
-        # Return corrupted data
-        mock_backend = MagicMock()
-        mock_backend.get_buffer.return_value = b"corrupted content"
-        mock_get_backend.return_value = mock_backend
-
-        with pytest.raises(DataJointError, match="Content hash mismatch"):
-            get_content(content_hash, store_name="test_store")
-
-
-class TestContentExists:
-    """Tests for content_exists function."""
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_returns_true_when_exists(self, mock_get_backend):
-        """Test that True is returned when content exists."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = True
-        mock_get_backend.return_value = mock_backend
-
-        content_hash = "a" * 64
-        assert content_exists(content_hash, store_name="test_store") is True
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_returns_false_when_not_exists(self, mock_get_backend):
-        """Test that False is returned when content doesn't exist."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = False
-        mock_get_backend.return_value = mock_backend
-
-        content_hash = "a" * 64
-        assert content_exists(content_hash, store_name="test_store") is False
-
-
-class TestDeleteContent:
-    """Tests for delete_content function."""
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_deletes_existing_content(self, mock_get_backend):
-        """Test deleting existing content."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = True
-        mock_get_backend.return_value = mock_backend
-
-        content_hash = "a" * 64
-        result = delete_content(content_hash, store_name="test_store")
-
-        assert result is True
-        mock_backend.remove.assert_called_once()
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_returns_false_for_nonexistent(self, mock_get_backend):
-        """Test that False is returned when content doesn't exist."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = False
-        mock_get_backend.return_value = mock_backend
-
-        content_hash = "a" * 64
-        result = delete_content(content_hash, store_name="test_store")
-
-        assert result is False
-        mock_backend.remove.assert_not_called()
-
-
-class TestGetContentSize:
-    """Tests for get_content_size function."""
-
-    @patch("datajoint.content_registry.get_store_backend")
-    def test_returns_size(self, mock_get_backend):
-        """Test getting content size."""
-        mock_backend = MagicMock()
-        mock_backend.size.return_value = 1024
-        mock_get_backend.return_value = mock_backend
-
-        content_hash = "a" * 64
-        result = get_content_size(content_hash, store_name="test_store")
-
-        assert result == 1024
diff --git a/tests/integration/test_gc.py b/tests/integration/test_gc.py
index e0c5fafca..7eca79f37 100644
--- a/tests/integration/test_gc.py
+++ b/tests/integration/test_gc.py
@@ -10,15 +10,15 @@
 from datajoint.errors import DataJointError
 
 
-class TestUsesContentStorage:
-    """Tests for _uses_content_storage helper function."""
+class TestUsesHashStorage:
+    """Tests for _uses_hash_storage helper function."""
 
     def test_returns_false_for_no_adapter(self):
         """Test that False is returned when attribute has no codec."""
         attr = MagicMock()
         attr.codec = None
 
-        assert gc._uses_content_storage(attr) is False
+        assert gc._uses_hash_storage(attr) is False
 
     def test_returns_true_for_hash_type(self):
         """Test that True is returned for <hash@> type."""
@@ -27,7 +27,7 @@ def test_returns_true_for_hash_type(self):
         attr.codec.name = "hash"
         attr.store = "mystore"
 
-        assert gc._uses_content_storage(attr) is True
+        assert gc._uses_hash_storage(attr) is True
 
     def test_returns_true_for_blob_external(self):
         """Test that True is returned for <blob@> type (external)."""
@@ -36,7 +36,7 @@ def test_returns_true_for_blob_external(self):
         attr.codec.name = "blob"
         attr.store = "mystore"
 
-        assert gc._uses_content_storage(attr) is True
+        assert gc._uses_hash_storage(attr) is True
 
     def test_returns_true_for_attach_external(self):
         """Test that True is returned for <attach@> type (external)."""
@@ -45,7 +45,7 @@ def test_returns_true_for_attach_external(self):
         attr.codec.name = "attach"
         attr.store = "mystore"
 
-        assert gc._uses_content_storage(attr) is True
+        assert gc._uses_hash_storage(attr) is True
 
     def test_returns_false_for_blob_internal(self):
         """Test that False is returned for <blob> internal storage."""
@@ -54,94 +54,102 @@ def test_returns_false_for_blob_internal(self):
         attr.codec.name = "blob"
         attr.store = None
 
-        assert gc._uses_content_storage(attr) is False
+        assert gc._uses_hash_storage(attr) is False
 
 
-class TestExtractContentRefs:
-    """Tests for _extract_content_refs helper function."""
+class TestExtractHashRefs:
+    """Tests for _extract_hash_refs helper function."""
 
     def test_returns_empty_for_none(self):
         """Test that empty list is returned for None value."""
-        assert gc._extract_content_refs(None) == []
+        assert gc._extract_hash_refs(None) == []
 
     def test_parses_json_string(self):
-        """Test parsing JSON string with hash."""
-        value = '{"hash": "abc123", "store": "mystore"}'
-        refs = gc._extract_content_refs(value)
+        """Test parsing JSON string with path."""
+        value = '{"path": "_hash/schema/abc123", "hash": "abc123", "store": "mystore"}'
+        refs = gc._extract_hash_refs(value)
 
         assert len(refs) == 1
-        assert refs[0] == ("abc123", "mystore")
+        assert refs[0] == ("_hash/schema/abc123", "mystore")
 
     def test_parses_dict_directly(self):
-        """Test parsing dict with hash."""
-        value = {"hash": "def456", "store": None}
-        refs = gc._extract_content_refs(value)
+        """Test parsing dict with path."""
+        value = {"path": "_hash/schema/def456", "hash": "def456", "store": None}
+        refs = gc._extract_hash_refs(value)
 
         assert len(refs) == 1
-        assert refs[0] == ("def456", None)
+        assert refs[0] == ("_hash/schema/def456", None)
 
     def test_returns_empty_for_invalid_json(self):
         """Test that empty list is returned for invalid JSON."""
-        assert gc._extract_content_refs("not json") == []
+        assert gc._extract_hash_refs("not json") == []
 
-    def test_returns_empty_for_dict_without_hash(self):
-        """Test that empty list is returned for dict without hash key."""
-        assert gc._extract_content_refs({"other": "data"}) == []
+    def test_returns_empty_for_dict_without_path(self):
+        """Test that empty list is returned for dict without path key."""
+        assert gc._extract_hash_refs({"hash": "abc123"}) == []
 
 
-class TestUsesObjectStorage:
-    """Tests for _uses_object_storage helper function."""
+class TestUsesSchemaStorage:
+    """Tests for _uses_schema_storage helper function."""
 
     def test_returns_false_for_no_adapter(self):
         """Test that False is returned when attribute has no codec."""
         attr = MagicMock()
         attr.codec = None
 
-        assert gc._uses_object_storage(attr) is False
+        assert gc._uses_schema_storage(attr) is False
 
     def test_returns_true_for_object_type(self):
-        """Test that True is returned for <object> type."""
+        """Test that True is returned for <object@> type."""
         attr = MagicMock()
         attr.codec = MagicMock()
         attr.codec.name = "object"
 
-        assert gc._uses_object_storage(attr) is True
+        assert gc._uses_schema_storage(attr) is True
+
+    def test_returns_true_for_npy_type(self):
+        """Test that True is returned for <npy@> type."""
+        attr = MagicMock()
+        attr.codec = MagicMock()
+        attr.codec.name = "npy"
+
+        assert gc._uses_schema_storage(attr) is True
 
     def test_returns_false_for_other_types(self):
-        """Test that False is returned for non-object types."""
+        """Test that False is returned for non-schema-addressed types."""
         attr = MagicMock()
         attr.codec = MagicMock()
         attr.codec.name = "blob"
 
-        assert gc._uses_object_storage(attr) is False
+        assert gc._uses_schema_storage(attr) is False
 
 
-class TestExtractObjectRefs:
-    """Tests for _extract_object_refs helper function."""
+class TestExtractSchemaRefs:
+    """Tests for _extract_schema_refs helper function."""
 
     def test_returns_empty_for_none(self):
         """Test that empty list is returned for None value."""
-        assert gc._extract_object_refs(None) == []
+        assert gc._extract_schema_refs(None) == []
 
     def test_parses_json_string(self):
         """Test parsing JSON string with path."""
-        value = '{"path": "schema/table/objects/pk/field_abc123", "store": "mystore"}'
-        refs = gc._extract_object_refs(value)
+        value = '{"path": "schema/table/pk/field", "store": "mystore"}'
+        refs = gc._extract_schema_refs(value)
 
         assert len(refs) == 1
-        assert refs[0] == ("schema/table/objects/pk/field_abc123", "mystore")
+        assert refs[0] == ("schema/table/pk/field", "mystore")
 
     def test_parses_dict_directly(self):
         """Test parsing dict with path."""
         value = {"path": "test/path", "store": None}
-        refs = gc._extract_object_refs(value)
+        refs = gc._extract_schema_refs(value)
 
         assert len(refs) == 1
         assert refs[0] == ("test/path", None)
 
     def test_returns_empty_for_dict_without_path(self):
         """Test that empty list is returned for dict without path key."""
-        assert gc._extract_object_refs({"other": "data"}) == []
+        assert gc._extract_schema_refs({"other": "data"}) == []
 
 
 class TestScan:
@@ -152,46 +160,46 @@ def test_requires_at_least_one_schema(self):
         with pytest.raises(DataJointError, match="At least one schema must be provided"):
             gc.scan()
 
-    @patch("datajoint.gc.scan_object_references")
-    @patch("datajoint.gc.list_stored_objects")
-    @patch("datajoint.gc.scan_references")
-    @patch("datajoint.gc.list_stored_content")
-    def test_returns_stats(self, mock_list_content, mock_scan_refs, mock_list_objects, mock_scan_objects):
+    @patch("datajoint.gc.scan_schema_references")
+    @patch("datajoint.gc.list_schema_paths")
+    @patch("datajoint.gc.scan_hash_references")
+    @patch("datajoint.gc.list_stored_hashes")
+    def test_returns_stats(self, mock_list_hashes, mock_scan_hash, mock_list_schemas, mock_scan_schema):
         """Test that scan returns proper statistics."""
-        # Mock content-addressed storage
-        mock_scan_refs.return_value = {"hash1", "hash2"}
-        mock_list_content.return_value = {
-            "hash1": 100,
-            "hash3": 200,  # orphaned
+        # Mock hash-addressed storage (now uses paths)
+        mock_scan_hash.return_value = {"_hash/schema/path1", "_hash/schema/path2"}
+        mock_list_hashes.return_value = {
+            "_hash/schema/path1": 100,
+            "_hash/schema/path3": 200,  # orphaned
         }
 
-        # Mock path-addressed storage
-        mock_scan_objects.return_value = {"path/to/obj1"}
-        mock_list_objects.return_value = {
-            "path/to/obj1": 500,
-            "path/to/obj2": 300,  # orphaned
+        # Mock schema-addressed storage
+        mock_scan_schema.return_value = {"schema/table/pk1/field"}
+        mock_list_schemas.return_value = {
+            "schema/table/pk1/field": 500,
+            "schema/table/pk2/field": 300,  # orphaned
         }
 
         mock_schema = MagicMock()
         stats = gc.scan(mock_schema, store_name="test_store")
 
-        # Content stats
-        assert stats["content_referenced"] == 2
-        assert stats["content_stored"] == 2
-        assert stats["content_orphaned"] == 1
-        assert "hash3" in stats["orphaned_hashes"]
+        # Hash stats
+        assert stats["hash_referenced"] == 2
+        assert stats["hash_stored"] == 2
+        assert stats["hash_orphaned"] == 1
+        assert "_hash/schema/path3" in stats["orphaned_hashes"]
 
-        # Object stats
-        assert stats["object_referenced"] == 1
-        assert stats["object_stored"] == 2
-        assert stats["object_orphaned"] == 1
-        assert "path/to/obj2" in stats["orphaned_paths"]
+        # Schema stats
+        assert stats["schema_paths_referenced"] == 1
+        assert stats["schema_paths_stored"] == 2
+        assert stats["schema_paths_orphaned"] == 1
+        assert "schema/table/pk2/field" in stats["orphaned_paths"]
 
         # Combined totals
         assert stats["referenced"] == 3
         assert stats["stored"] == 4
         assert stats["orphaned"] == 2
-        assert stats["orphaned_bytes"] == 500  # 200 content + 300 object
+        assert stats["orphaned_bytes"] == 500  # 200 hash + 300 schema
 
 
 class TestCollect:
@@ -205,10 +213,10 @@ def test_dry_run_does_not_delete(self, mock_scan):
             "stored": 2,
             "orphaned": 1,
             "orphaned_bytes": 100,
-            "orphaned_hashes": ["orphan_hash"],
+            "orphaned_hashes": ["_hash/schema/orphan_path"],
             "orphaned_paths": [],
-            "content_orphaned": 1,
-            "object_orphaned": 0,
+            "hash_orphaned": 1,
+            "schema_paths_orphaned": 0,
         }
 
         mock_schema = MagicMock()
@@ -218,59 +226,59 @@ def test_dry_run_does_not_delete(self, mock_scan):
         assert stats["bytes_freed"] == 0
         assert stats["dry_run"] is True
 
-    @patch("datajoint.gc.delete_content")
-    @patch("datajoint.gc.list_stored_content")
+    @patch("datajoint.gc.delete_path")
+    @patch("datajoint.gc.list_stored_hashes")
     @patch("datajoint.gc.scan")
-    def test_deletes_orphaned_content(self, mock_scan, mock_list_stored, mock_delete):
-        """Test that orphaned content is deleted when dry_run=False."""
+    def test_deletes_orphaned_hashes(self, mock_scan, mock_list_stored, mock_delete):
+        """Test that orphaned hashes are deleted when dry_run=False."""
         mock_scan.return_value = {
             "referenced": 1,
             "stored": 2,
             "orphaned": 1,
             "orphaned_bytes": 100,
-            "orphaned_hashes": ["orphan_hash"],
+            "orphaned_hashes": ["_hash/schema/orphan_path"],
             "orphaned_paths": [],
-            "content_orphaned": 1,
-            "object_orphaned": 0,
+            "hash_orphaned": 1,
+            "schema_paths_orphaned": 0,
         }
-        mock_list_stored.return_value = {"orphan_hash": 100}
+        mock_list_stored.return_value = {"_hash/schema/orphan_path": 100}
         mock_delete.return_value = True
 
         mock_schema = MagicMock()
         stats = gc.collect(mock_schema, store_name="test_store", dry_run=False)
 
         assert stats["deleted"] == 1
-        assert stats["content_deleted"] == 1
+        assert stats["hash_deleted"] == 1
         assert stats["bytes_freed"] == 100
         assert stats["dry_run"] is False
-        mock_delete.assert_called_once_with("orphan_hash", "test_store")
+        mock_delete.assert_called_once_with("_hash/schema/orphan_path", "test_store")
 
-    @patch("datajoint.gc.delete_object")
-    @patch("datajoint.gc.list_stored_objects")
+    @patch("datajoint.gc.delete_schema_path")
+    @patch("datajoint.gc.list_schema_paths")
     @patch("datajoint.gc.scan")
-    def test_deletes_orphaned_objects(self, mock_scan, mock_list_objects, mock_delete):
-        """Test that orphaned objects are deleted when dry_run=False."""
+    def test_deletes_orphaned_schemas(self, mock_scan, mock_list_schemas, mock_delete):
+        """Test that orphaned schema paths are deleted when dry_run=False."""
         mock_scan.return_value = {
             "referenced": 1,
             "stored": 2,
             "orphaned": 1,
             "orphaned_bytes": 500,
             "orphaned_hashes": [],
-            "orphaned_paths": ["path/to/orphan"],
-            "content_orphaned": 0,
-            "object_orphaned": 1,
+            "orphaned_paths": ["schema/table/pk/field"],
+            "hash_orphaned": 0,
+            "schema_paths_orphaned": 1,
         }
-        mock_list_objects.return_value = {"path/to/orphan": 500}
+        mock_list_schemas.return_value = {"schema/table/pk/field": 500}
         mock_delete.return_value = True
 
         mock_schema = MagicMock()
         stats = gc.collect(mock_schema, store_name="test_store", dry_run=False)
 
         assert stats["deleted"] == 1
-        assert stats["object_deleted"] == 1
+        assert stats["schema_paths_deleted"] == 1
         assert stats["bytes_freed"] == 500
         assert stats["dry_run"] is False
-        mock_delete.assert_called_once_with("path/to/orphan", "test_store")
+        mock_delete.assert_called_once_with("schema/table/pk/field", "test_store")
 
 
 class TestFormatStats:
@@ -283,14 +291,14 @@ def test_formats_scan_stats(self):
             "stored": 15,
             "orphaned": 5,
             "orphaned_bytes": 1024 * 1024,  # 1 MB
-            "content_referenced": 6,
-            "content_stored": 8,
-            "content_orphaned": 2,
-            "content_orphaned_bytes": 512 * 1024,
-            "object_referenced": 4,
-            "object_stored": 7,
-            "object_orphaned": 3,
-            "object_orphaned_bytes": 512 * 1024,
+            "hash_referenced": 6,
+            "hash_stored": 8,
+            "hash_orphaned": 2,
+            "hash_orphaned_bytes": 512 * 1024,
+            "schema_paths_referenced": 4,
+            "schema_paths_stored": 7,
+            "schema_paths_orphaned": 3,
+            "schema_paths_orphaned_bytes": 512 * 1024,
         }
 
         result = gc.format_stats(stats)
@@ -300,8 +308,8 @@ def test_formats_scan_stats(self):
         assert "Orphaned (unreferenced): 5" in result
         assert "1.00 MB" in result
         # Check for detailed sections
-        assert "Content-Addressed Storage" in result
-        assert "Path-Addressed Storage" in result
+        assert "Hash-Addressed Storage" in result
+        assert "Schema-Addressed Storage" in result
 
     def test_formats_collect_stats_dry_run(self):
         """Test formatting collect statistics with dry_run."""
@@ -325,8 +333,8 @@ def test_formats_collect_stats_actual(self):
             "stored": 15,
             "orphaned": 5,
             "deleted": 3,
-            "content_deleted": 2,
-            "object_deleted": 1,
+            "hash_deleted": 2,
+            "schema_paths_deleted": 1,
             "bytes_freed": 2 * 1024 * 1024,  # 2 MB
             "errors": 2,
             "dry_run": False,
@@ -335,7 +343,7 @@ def test_formats_collect_stats_actual(self):
         result = gc.format_stats(stats)
 
         assert "Deleted:     3" in result
-        assert "Content: 2" in result
-        assert "Objects: 1" in result
+        assert "Hash items:   2" in result
+        assert "Schema paths: 1" in result
         assert "2.00 MB" in result
         assert "Errors:      2" in result
diff --git a/tests/integration/test_hash_storage.py b/tests/integration/test_hash_storage.py
new file mode 100644
index 000000000..8cd8b5b93
--- /dev/null
+++ b/tests/integration/test_hash_storage.py
@@ -0,0 +1,304 @@
+"""
+Tests for hash-addressed storage (hash_registry.py).
+"""
+
+import re
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from datajoint.hash_registry import (
+    build_hash_path,
+    compute_hash,
+    hash_exists,
+    delete_path,
+    delete_hash,
+    get_hash,
+    get_size,
+    get_hash_size,
+    put_hash,
+)
+from datajoint.errors import DataJointError
+
+
+# Base32 pattern for validation (26 lowercase alphanumeric chars)
+BASE32_PATTERN = re.compile(r"^[a-z2-7]{26}$")
+
+
+class TestComputeHash:
+    """Tests for compute_hash function."""
+
+    def test_returns_base32_format(self):
+        """Test that hash is returned as Base32 string."""
+        data = b"Hello, World!"
+        result = compute_hash(data)
+
+        # Should be valid Base32 format (26 lowercase chars)
+        assert len(result) == 26
+        assert BASE32_PATTERN.match(result)
+
+    def test_empty_bytes(self):
+        """Test hashing empty bytes."""
+        result = compute_hash(b"")
+        assert BASE32_PATTERN.match(result)
+
+    def test_different_content_different_hash(self):
+        """Test that different content produces different hashes."""
+        hash1 = compute_hash(b"content1")
+        hash2 = compute_hash(b"content2")
+        assert hash1 != hash2
+
+    def test_same_content_same_hash(self):
+        """Test that same content produces same hash."""
+        data = b"identical content"
+        hash1 = compute_hash(data)
+        hash2 = compute_hash(data)
+        assert hash1 == hash2
+
+
+class TestBuildHashPath:
+    """Tests for build_hash_path function."""
+
+    def test_builds_flat_path(self):
+        """Test that path is built as _hash/{schema}/{hash}."""
+        test_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # 26 char base32
+        result = build_hash_path(test_hash, "my_schema")
+
+        assert result == f"_hash/my_schema/{test_hash}"
+
+    def test_builds_subfolded_path(self):
+        """Test path with subfolding."""
+        test_hash = "abcdefghijklmnopqrstuvwxyz"[:26]
+        result = build_hash_path(test_hash, "my_schema", subfolding=(2, 2))
+
+        assert result == f"_hash/my_schema/ab/cd/{test_hash}"
+
+    def test_rejects_invalid_hash(self):
+        """Test that invalid hash raises error."""
+        with pytest.raises(DataJointError, match="Invalid content hash"):
+            build_hash_path("not-a-hash", "my_schema")
+
+        with pytest.raises(DataJointError, match="Invalid content hash"):
+            build_hash_path("a" * 64, "my_schema")  # Too long
+
+        with pytest.raises(DataJointError, match="Invalid content hash"):
+            build_hash_path("ABCDEFGHIJKLMNOPQRSTUVWXYZ"[:26], "my_schema")  # Uppercase
+
+    def test_real_hash_path(self):
+        """Test path building with a real computed hash."""
+        data = b"test content"
+        content_hash = compute_hash(data)
+        path = build_hash_path(content_hash, "test_schema")
+
+        # Verify structure: _hash/{schema}/{hash}
+        parts = path.split("/")
+        assert len(parts) == 3
+        assert parts[0] == "_hash"
+        assert parts[1] == "test_schema"
+        assert parts[2] == content_hash
+        assert BASE32_PATTERN.match(parts[2])
+
+
+class TestPutHash:
+    """Tests for put_hash function."""
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_stores_new_content(self, mock_get_backend, mock_get_subfolding):
+        """Test storing new content."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = False
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        data = b"new content"
+        result = put_hash(data, schema_name="test_schema", store_name="test_store")
+
+        # Verify return value includes hash and path
+        assert "hash" in result
+        assert "path" in result
+        assert result["hash"] == compute_hash(data)
+        assert result["path"] == f"_hash/test_schema/{result['hash']}"
+        assert result["schema"] == "test_schema"
+        assert result["store"] == "test_store"
+        assert result["size"] == len(data)
+
+        # Verify backend was called
+        mock_backend.put_buffer.assert_called_once()
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_deduplicates_existing_content(self, mock_get_backend, mock_get_subfolding):
+        """Test that existing content is not re-uploaded."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = True  # Content already exists
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        data = b"existing content"
+        result = put_hash(data, schema_name="test_schema", store_name="test_store")
+
+        # Verify return value is still correct
+        assert result["hash"] == compute_hash(data)
+        assert "path" in result
+        assert result["schema"] == "test_schema"
+        assert result["size"] == len(data)
+
+        # Verify put_buffer was NOT called (deduplication)
+        mock_backend.put_buffer.assert_not_called()
+
+
+class TestGetHash:
+    """Tests for get_hash function."""
+
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_retrieves_content(self, mock_get_backend):
+        """Test retrieving content using metadata."""
+        data = b"stored content"
+        content_hash = compute_hash(data)
+
+        mock_backend = MagicMock()
+        mock_backend.get_buffer.return_value = data
+        mock_get_backend.return_value = mock_backend
+
+        metadata = {
+            "hash": content_hash,
+            "path": f"_hash/test_schema/{content_hash}",
+            "store": "test_store",
+        }
+        result = get_hash(metadata)
+
+        assert result == data
+        mock_backend.get_buffer.assert_called_once_with(metadata["path"])
+
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_verifies_hash(self, mock_get_backend):
+        """Test that hash is verified on retrieval."""
+        data = b"original content"
+        content_hash = compute_hash(data)
+
+        # Return corrupted data
+        mock_backend = MagicMock()
+        mock_backend.get_buffer.return_value = b"corrupted content"
+        mock_get_backend.return_value = mock_backend
+
+        metadata = {
+            "hash": content_hash,
+            "path": f"_hash/test_schema/{content_hash}",
+            "store": "test_store",
+        }
+
+        with pytest.raises(DataJointError, match="Hash mismatch"):
+            get_hash(metadata)
+
+
+class TestHashExists:
+    """Tests for hash_exists function."""
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_returns_true_when_exists(self, mock_get_backend, mock_get_subfolding):
+        """Test that True is returned when content exists."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = True
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
+        assert hash_exists(content_hash, schema_name="test_schema", store_name="test_store") is True
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_returns_false_when_not_exists(self, mock_get_backend, mock_get_subfolding):
+        """Test that False is returned when content doesn't exist."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = False
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
+        assert hash_exists(content_hash, schema_name="test_schema", store_name="test_store") is False
+
+
+class TestDeletePath:
+    """Tests for delete_path function."""
+
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_deletes_existing_content(self, mock_get_backend):
+        """Test deleting existing content by path."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = True
+        mock_get_backend.return_value = mock_backend
+
+        path = "_hash/test_schema/abcdefghijklmnopqrst"
+        result = delete_path(path, store_name="test_store")
+
+        assert result is True
+        mock_backend.remove.assert_called_once_with(path)
+
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_returns_false_for_nonexistent(self, mock_get_backend):
+        """Test that False is returned when content doesn't exist."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = False
+        mock_get_backend.return_value = mock_backend
+
+        path = "_hash/test_schema/abcdefghijklmnopqrst"
+        result = delete_path(path, store_name="test_store")
+
+        assert result is False
+        mock_backend.remove.assert_not_called()
+
+
+class TestDeleteHash:
+    """Tests for delete_hash function (backward compatibility)."""
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_deletes_existing_content(self, mock_get_backend, mock_get_subfolding):
+        """Test deleting existing content by hash."""
+        mock_backend = MagicMock()
+        mock_backend.exists.return_value = True
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
+        result = delete_hash(content_hash, schema_name="test_schema", store_name="test_store")
+
+        assert result is True
+        mock_backend.remove.assert_called_once()
+
+
+class TestGetSize:
+    """Tests for get_size function."""
+
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_returns_size(self, mock_get_backend):
+        """Test getting content size by path."""
+        mock_backend = MagicMock()
+        mock_backend.size.return_value = 1024
+        mock_get_backend.return_value = mock_backend
+
+        path = "_hash/test_schema/abcdefghijklmnopqrst"
+        result = get_size(path, store_name="test_store")
+
+        assert result == 1024
+        mock_backend.size.assert_called_once_with(path)
+
+
+class TestGetHashSize:
+    """Tests for get_hash_size function (backward compatibility)."""
+
+    @patch("datajoint.hash_registry.get_store_subfolding")
+    @patch("datajoint.hash_registry.get_store_backend")
+    def test_returns_size(self, mock_get_backend, mock_get_subfolding):
+        """Test getting content size by hash."""
+        mock_backend = MagicMock()
+        mock_backend.size.return_value = 1024
+        mock_get_backend.return_value = mock_backend
+        mock_get_subfolding.return_value = None
+
+        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
+        result = get_hash_size(content_hash, schema_name="test_schema", store_name="test_store")
+
+        assert result == 1024

From d2ab4de23b4f8febfd72ee07aa3084f02b36b7ef Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Tue, 13 Jan 2026 14:05:24 -0600
Subject: [PATCH 09/10] refactor: Remove uuid_from_buffer, use hashlib directly
 for query cache

- Remove uuid_from_buffer from hash.py (dead code)
- connection.py now uses hashlib.md5().hexdigest() directly
- Update test_hash.py to test key_hash instead

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/connection.py |  4 ++--
 src/datajoint/hash.py       | 14 --------------
 tests/unit/test_hash.py     |  8 +++++---
 3 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py
index 57301c2f3..219e97d98 100644
--- a/src/datajoint/connection.py
+++ b/src/datajoint/connection.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import hashlib
 import logging
 import pathlib
 import re
@@ -18,7 +19,6 @@
 from . import errors
 from .blob import pack, unpack
 from .dependencies import Dependencies
-from .hash import uuid_from_buffer
 from .settings import config
 from .version import __version__
 
@@ -418,7 +418,7 @@ def query(
         if use_query_cache:
             if not config[cache_key]:
                 raise errors.DataJointError(f"Provide filepath dj.config['{cache_key}'] when using query caching.")
-            hash_ = uuid_from_buffer((str(self._query_cache) + re.sub(r"`\$\w+`", "", query)).encode() + pack(args))
+            hash_ = hashlib.md5((str(self._query_cache) + re.sub(r"`\$\w+`", "", query)).encode() + pack(args)).hexdigest()
             cache_path = pathlib.Path(config[cache_key]) / str(hash_)
             try:
                 buffer = cache_path.read_bytes()
diff --git a/src/datajoint/hash.py b/src/datajoint/hash.py
index 2a58e9bf4..58f87b88e 100644
--- a/src/datajoint/hash.py
+++ b/src/datajoint/hash.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import hashlib
-import uuid
 from typing import Any
 
 
@@ -14,16 +13,3 @@ def key_hash(mapping: dict[str, Any]) -> str:
     for k, v in sorted(mapping.items()):
         hashed.update(str(v).encode())
     return hashed.hexdigest()
-
-
-def uuid_from_buffer(buffer: bytes = b"", *, init_string: str = "") -> uuid.UUID:
-    """
-    Compute MD5 hash of buffer data, returned as UUID.
-
-    :param buffer: bytes to hash
-    :param init_string: string to initialize the checksum (for namespacing)
-    :return: UUID based on MD5 digest
-    """
-    hashed = hashlib.md5(init_string.encode())
-    hashed.update(buffer)
-    return uuid.UUID(bytes=hashed.digest())
diff --git a/tests/unit/test_hash.py b/tests/unit/test_hash.py
index a88c45316..125ab4dbe 100644
--- a/tests/unit/test_hash.py
+++ b/tests/unit/test_hash.py
@@ -1,6 +1,8 @@
 from datajoint import hash
 
 
-def test_hash():
-    assert hash.uuid_from_buffer(b"abc").hex == "900150983cd24fb0d6963f7d28e17f72"
-    assert hash.uuid_from_buffer(b"").hex == "d41d8cd98f00b204e9800998ecf8427e"
+def test_key_hash():
+    """Test that key_hash produces consistent MD5 hex digests."""
+    assert hash.key_hash({"a": 1, "b": 2}) == hash.key_hash({"b": 2, "a": 1})
+    assert hash.key_hash({"x": "hello"}) == "5d41402abc4b2a76b9719d911017c592"
+    assert hash.key_hash({}) == "d41d8cd98f00b204e9800998ecf8427e"

From 58f2b67a2b8271e8a86076461c0c4c100ddce832 Mon Sep 17 00:00:00 2001
From: Dimitri Yatsenko <dimitri.yatsenko@gmail.com>
Date: Tue, 13 Jan 2026 14:10:47 -0600
Subject: [PATCH 10/10] refactor: Remove unused functions from hash_registry

Remove dead code that was only tested but never used in production:
- hash_exists (gc uses set operations on paths)
- delete_hash (gc uses delete_path directly)
- get_size (gc collects sizes during walk)
- get_hash_size (wrapper for get_size)

Remaining API: compute_hash, build_hash_path, get_store_backend,
get_store_subfolding, put_hash, get_hash, delete_path

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/datajoint/hash_registry.py         | 113 -------------------------
 tests/integration/test_hash_storage.py |  86 -------------------
 2 files changed, 199 deletions(-)

diff --git a/src/datajoint/hash_registry.py b/src/datajoint/hash_registry.py
index 7b286e874..5033f13e5 100644
--- a/src/datajoint/hash_registry.py
+++ b/src/datajoint/hash_registry.py
@@ -267,34 +267,6 @@ def get_hash(metadata: dict[str, Any]) -> bytes:
     return data
 
 
-def hash_exists(
-    content_hash: str,
-    schema_name: str,
-    store_name: str | None = None,
-) -> bool:
-    """
-    Check if hash-addressed content exists in storage.
-
-    Parameters
-    ----------
-    content_hash : str
-        Base32-encoded hash (26 characters).
-    schema_name : str
-        Database/schema name for path isolation.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    bool
-        True if content exists.
-    """
-    subfolding = get_store_subfolding(store_name)
-    path = build_hash_path(content_hash, schema_name, subfolding)
-    backend = get_store_backend(store_name)
-    return backend.exists(path)
-
-
 def delete_path(
     path: str,
     store_name: str | None = None,
@@ -328,88 +300,3 @@ def delete_path(
         logger.debug(f"Deleted: {path}")
         return True
     return False
-
-
-# Backward compatibility alias
-def delete_hash(
-    content_hash: str,
-    schema_name: str,
-    store_name: str | None = None,
-) -> bool:
-    """
-    Delete hash-addressed content from storage (deprecated).
-
-    .. deprecated::
-        Use :func:`delete_path` with the stored path instead.
-
-    Parameters
-    ----------
-    content_hash : str
-        Base32-encoded hash (26 characters).
-    schema_name : str
-        Database/schema name for path isolation.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    bool
-        True if content was deleted, False if it didn't exist.
-    """
-    subfolding = get_store_subfolding(store_name)
-    path = build_hash_path(content_hash, schema_name, subfolding)
-    return delete_path(path, store_name)
-
-
-def get_size(
-    path: str,
-    store_name: str | None = None,
-) -> int:
-    """
-    Get the size of content at the specified path.
-
-    Parameters
-    ----------
-    path : str
-        Storage path (as stored in metadata).
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    int
-        Size in bytes.
-    """
-    backend = get_store_backend(store_name)
-    return backend.size(path)
-
-
-# Backward compatibility alias
-def get_hash_size(
-    content_hash: str,
-    schema_name: str,
-    store_name: str | None = None,
-) -> int:
-    """
-    Get the size of hash-addressed content (deprecated).
-
-    .. deprecated::
-        Use :func:`get_size` with the stored path instead.
-
-    Parameters
-    ----------
-    content_hash : str
-        Base32-encoded hash (26 characters).
-    schema_name : str
-        Database/schema name for path isolation.
-    store_name : str, optional
-        Name of the store. If None, uses default store.
-
-    Returns
-    -------
-    int
-        Size in bytes.
-    """
-    subfolding = get_store_subfolding(store_name)
-    path = build_hash_path(content_hash, schema_name, subfolding)
-    return get_size(path, store_name)
diff --git a/tests/integration/test_hash_storage.py b/tests/integration/test_hash_storage.py
index 8cd8b5b93..bc1c61a4d 100644
--- a/tests/integration/test_hash_storage.py
+++ b/tests/integration/test_hash_storage.py
@@ -10,12 +10,8 @@
 from datajoint.hash_registry import (
     build_hash_path,
     compute_hash,
-    hash_exists,
     delete_path,
-    delete_hash,
     get_hash,
-    get_size,
-    get_hash_size,
     put_hash,
 )
 from datajoint.errors import DataJointError
@@ -192,34 +188,6 @@ def test_verifies_hash(self, mock_get_backend):
             get_hash(metadata)
 
 
-class TestHashExists:
-    """Tests for hash_exists function."""
-
-    @patch("datajoint.hash_registry.get_store_subfolding")
-    @patch("datajoint.hash_registry.get_store_backend")
-    def test_returns_true_when_exists(self, mock_get_backend, mock_get_subfolding):
-        """Test that True is returned when content exists."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = True
-        mock_get_backend.return_value = mock_backend
-        mock_get_subfolding.return_value = None
-
-        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
-        assert hash_exists(content_hash, schema_name="test_schema", store_name="test_store") is True
-
-    @patch("datajoint.hash_registry.get_store_subfolding")
-    @patch("datajoint.hash_registry.get_store_backend")
-    def test_returns_false_when_not_exists(self, mock_get_backend, mock_get_subfolding):
-        """Test that False is returned when content doesn't exist."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = False
-        mock_get_backend.return_value = mock_backend
-        mock_get_subfolding.return_value = None
-
-        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
-        assert hash_exists(content_hash, schema_name="test_schema", store_name="test_store") is False
-
-
 class TestDeletePath:
     """Tests for delete_path function."""
 
@@ -248,57 +216,3 @@ def test_returns_false_for_nonexistent(self, mock_get_backend):
 
         assert result is False
         mock_backend.remove.assert_not_called()
-
-
-class TestDeleteHash:
-    """Tests for delete_hash function (backward compatibility)."""
-
-    @patch("datajoint.hash_registry.get_store_subfolding")
-    @patch("datajoint.hash_registry.get_store_backend")
-    def test_deletes_existing_content(self, mock_get_backend, mock_get_subfolding):
-        """Test deleting existing content by hash."""
-        mock_backend = MagicMock()
-        mock_backend.exists.return_value = True
-        mock_get_backend.return_value = mock_backend
-        mock_get_subfolding.return_value = None
-
-        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
-        result = delete_hash(content_hash, schema_name="test_schema", store_name="test_store")
-
-        assert result is True
-        mock_backend.remove.assert_called_once()
-
-
-class TestGetSize:
-    """Tests for get_size function."""
-
-    @patch("datajoint.hash_registry.get_store_backend")
-    def test_returns_size(self, mock_get_backend):
-        """Test getting content size by path."""
-        mock_backend = MagicMock()
-        mock_backend.size.return_value = 1024
-        mock_get_backend.return_value = mock_backend
-
-        path = "_hash/test_schema/abcdefghijklmnopqrst"
-        result = get_size(path, store_name="test_store")
-
-        assert result == 1024
-        mock_backend.size.assert_called_once_with(path)
-
-
-class TestGetHashSize:
-    """Tests for get_hash_size function (backward compatibility)."""
-
-    @patch("datajoint.hash_registry.get_store_subfolding")
-    @patch("datajoint.hash_registry.get_store_backend")
-    def test_returns_size(self, mock_get_backend, mock_get_subfolding):
-        """Test getting content size by hash."""
-        mock_backend = MagicMock()
-        mock_backend.size.return_value = 1024
-        mock_get_backend.return_value = mock_backend
-        mock_get_subfolding.return_value = None
-
-        content_hash = "abcdefghijklmnopqrstuvwxyz"[:26]  # Valid base32
-        result = get_hash_size(content_hash, schema_name="test_schema", store_name="test_store")
-
-        assert result == 1024