diff --git a/mkdocs.yaml b/mkdocs.yaml index 717664c1..89e7ca9c 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -10,8 +10,6 @@ nav: - explanation/index.md - Overview: - Data Pipelines: explanation/data-pipelines.md - - What's New in 2.0: explanation/whats-new-2.md - - What's New in 2.2: explanation/whats-new-22.md - FAQ: explanation/faq.md - Data Model: - Relational Workflow Model: explanation/relational-workflow-model.md @@ -127,6 +125,9 @@ nav: - API: api/ # Auto-generated via gen-files + literate-nav - About: - about/index.md + - What's New in 2.2: about/whats-new-22.md + - What's New in 2.1: about/whats-new-21.md + - What's New in 2.0: about/whats-new-2.md - History: about/history.md - Documentation Versioning: about/versioning.md - Platform: https://www.datajoint.com/sign-up diff --git a/src/.overrides/partials/announce.html b/src/.overrides/partials/announce.html index d45cdea3..7f19c3b2 100644 --- a/src/.overrides/partials/announce.html +++ b/src/.overrides/partials/announce.html @@ -1,5 +1,5 @@ {% if config.extra.datajoint_version %} - + Documentation for DataJoint {{ config.extra.datajoint_version }} {% endif %} diff --git a/src/about/versioning.md b/src/about/versioning.md index 972f8391..365618c2 100644 --- a/src/about/versioning.md +++ b/src/about/versioning.md @@ -87,7 +87,7 @@ print(dj.__version__) If you're upgrading from legacy DataJoint (pre-2.0): -1. **Review** the [What's New in 2.0](../explanation/whats-new-2.md) page to understand major changes +1. **Review** the [What's New in 2.0](whats-new-2.md) page to understand major changes 2. **Follow** the [Migration Guide](../how-to/migrate-to-v20.md) for step-by-step upgrade instructions 3. **Reference** this documentation for updated syntax and APIs diff --git a/src/explanation/whats-new-2.md b/src/about/whats-new-2.md similarity index 92% rename from src/explanation/whats-new-2.md rename to src/about/whats-new-2.md index bb63e57b..29273512 100644 --- a/src/explanation/whats-new-2.md +++ b/src/about/whats-new-2.md @@ -274,20 +274,12 @@ Most users complete Phases 1-2 in a single session. Phases 3-4 only apply if you ## See Also -### Migration -- **[Migration Guide](../how-to/migrate-to-v20.md/)** — Complete upgrade instructions -- [Configuration](../how-to/configure-database.md/) — Setup new configuration system - -### Core Concepts -- [Type System](type-system.md) — Understand the three-tier type architecture -- [Computation Model](computation-model.md) — Jobs 2.0 and AutoPopulate -- [Query Algebra](query-algebra.md) — Semantic matching and operators - -### Getting Started -- [Installation](../how-to/installation.md/) — Install DataJoint 2.0 -- [Tutorials](../tutorials/index.md/) — Learn by example - -### Reference -- [Type System Specification](../reference/specs/type-system.md/) — Complete type system details -- [Codec API](../reference/specs/codec-api.md/) — Build custom codecs -- [AutoPopulate Specification](../reference/specs/autopopulate.md/) — Jobs 2.0 reference +- [What's New in 2.1](whats-new-21.md) — Next release +- [Release Notes (v2.0.0)](https://github.com/datajoint/datajoint-python/releases/tag/v2.0.0) — GitHub changelog +- **[Migration Guide](../how-to/migrate-to-v20.md)** — Complete upgrade instructions +- [Configuration](../how-to/configure-database.md) — Setup new configuration system +- [Type System](../explanation/type-system.md) — Understand the three-tier type architecture +- [Computation Model](../explanation/computation-model.md) — Jobs 2.0 and AutoPopulate +- [Query Algebra](../explanation/query-algebra.md) — Semantic matching and operators +- [Installation](../how-to/installation.md) — Install DataJoint 2.0 +- [Tutorials](../tutorials/index.md) — Learn by example diff --git a/src/about/whats-new-21.md b/src/about/whats-new-21.md new file mode 100644 index 00000000..0ef673ea --- /dev/null +++ b/src/about/whats-new-21.md @@ -0,0 +1,125 @@ +# What's New in DataJoint 2.1 + +DataJoint 2.1 adds **PostgreSQL as a production backend**, **enhanced diagram visualization**, and **singleton tables**. + +> **Upgrading from 2.0?** No breaking changes. All existing code continues to work. New features are purely additive. + +> **Citation:** Yatsenko D, Nguyen TT. *DataJoint 2.0: A Computational Substrate for Agentic Scientific Workflows.* arXiv:2602.16585. 2026. [doi:10.48550/arXiv.2602.16585](https://doi.org/10.48550/arXiv.2602.16585) + +## PostgreSQL Backend + +DataJoint now supports PostgreSQL 15+ as a production database backend alongside MySQL 8+. The adapter architecture generates backend-specific SQL while maintaining a consistent API — the same table definitions, queries, and pipeline logic work on both backends. + +```bash +export DJ_BACKEND=postgresql +export DJ_HOST=localhost +export DJ_PORT=5432 +``` + +Or configure programmatically: + +```python +dj.config['database.backend'] = 'postgresql' +``` + +All core types (`int32`, `float64`, `varchar`, `uuid`, `json`), codec types (``, ``, ``), query operations, foreign keys, indexes, and auto-populate work identically across backends. Backend-specific differences are handled internally by the adapter layer. + +See [Database Backends](../reference/specs/database-backends.md) for the full specification. + +## Diagram Enhancements + +`dj.Diagram` gains several visualization features for working with complex, multi-schema pipelines. + +### Layout Direction + +Control the flow direction of diagrams: + +```python +# Horizontal layout +dj.config.display.diagram_direction = "LR" + +# Or temporarily +with dj.config.override(display__diagram_direction="LR"): + dj.Diagram(schema).draw() +``` + +| Value | Description | +|-------|-------------| +| `"TB"` | Top to bottom (default) | +| `"LR"` | Left to right | + +### Mermaid Output + +Generate [Mermaid](https://mermaid.js.org/) syntax for embedding diagrams in Markdown, GitHub, or web documentation: + +```python +print(dj.Diagram(schema).make_mermaid()) +``` + +Save directly to `.mmd` or `.mermaid` files: + +```python +dj.Diagram(schema).save("pipeline.mmd") +``` + +### Schema Grouping + +Multi-schema diagrams automatically group tables into visual clusters by database schema. The cluster label shows the Python module name when available, following the DataJoint convention of one module per schema. + +```python +combined = dj.Diagram(schema1) + dj.Diagram(schema2) +combined.draw() # tables grouped by schema +``` + +### Collapsing Schemas + +For high-level pipeline views, collapse entire schemas into single nodes: + +```python +# Show schema1 expanded, schema2 as a single node with table count +dj.Diagram(schema1) + dj.Diagram(schema2).collapse() +``` + +The **"expanded wins" rule** applies: if a table appears in both a collapsed and non-collapsed diagram, it stays expanded. This allows showing specific tables while collapsing the rest: + +```python +# Subject is expanded, rest of analysis schema is collapsed +dj.Diagram(Subject) + dj.Diagram(analysis).collapse() +``` + +See [Diagram Specification](../reference/specs/diagram.md) for the full reference. + +## Singleton Tables + +A **singleton table** holds at most one row. Declare it with no attributes in the primary key section: + +```python +@schema +class Config(dj.Lookup): + definition = """ + # Global configuration + --- + setting1 : varchar(100) + setting2 : int32 + """ +``` + +| Operation | Result | +|-----------|--------| +| Insert | Works without specifying a key | +| Second insert | Raises `DuplicateError` | +| `fetch1()` | Returns the single row | + +Useful for global configuration, pipeline parameters, and summary statistics. + +See [Table Declaration](../reference/specs/table-declaration.md#25-singleton-tables-empty-primary-keys) for details. + +## See Also + +- [Database Backends](../reference/specs/database-backends.md) — Full backend specification +- [Diagram Specification](../reference/specs/diagram.md) — Diagram reference +- [Table Declaration](../reference/specs/table-declaration.md) — Singleton tables +- [Configure Database](../how-to/configure-database.md) — Connection setup for both backends +- [What's New in 2.0](whats-new-2.md) — Previous release +- [What's New in 2.2](whats-new-22.md) — Next release +- [Release Notes (v2.1.0)](https://github.com/datajoint/datajoint-python/releases/tag/v2.1.0) — GitHub changelog diff --git a/src/explanation/whats-new-22.md b/src/about/whats-new-22.md similarity index 78% rename from src/explanation/whats-new-22.md rename to src/about/whats-new-22.md index f33abb1d..9ca917fa 100644 --- a/src/explanation/whats-new-22.md +++ b/src/about/whats-new-22.md @@ -213,7 +213,7 @@ In prior versions, `dj.Diagram` existed solely for visualization — drawing the - **PostgreSQL** aborts the entire transaction on any error, requiring `SAVEPOINT` / `ROLLBACK TO SAVEPOINT` round-trips for each failed delete attempt. - **Fragile error parsing** across MySQL versions and privilege levels, where different configurations produce different error message formats. -In 2.2, `Table.delete()` and `Table.drop()` use `dj.Diagram` internally to compute the dependency graph and walk it in reverse topological order — deleting leaves first, with no trial-and-error needed. The user-facing behavior of `Table.delete()` is unchanged. The Diagram's `cascade()` and `preview()` methods are available as a public inspection API for understanding cascade impact before executing. +In 2.2, `Table.delete()` and `Table.drop()` use `dj.Diagram` internally to compute the dependency graph and walk it in reverse topological order — deleting leaves first, with no trial-and-error needed. The user-facing behavior of `Table.delete()` is unchanged. The Diagram's `cascade()` and `counts()` methods are available as a public inspection API for understanding cascade impact before executing. ### The Preview-Then-Execute Pattern @@ -225,7 +225,7 @@ diag = dj.Diagram(schema) restricted = diag.cascade(Session & {'subject_id': 'M001'}) # Inspect: what tables and how many rows would be affected? -counts = restricted.preview() +counts = restricted.counts() # {'`lab`.`session`': 3, '`lab`.`trial`': 45, '`lab`.`processed_data`': 45} # Execute via Table.delete() after reviewing the blast radius @@ -238,11 +238,11 @@ This is valuable when working with unfamiliar pipelines, large datasets, or mult The diagram supports two restriction propagation modes designed for fundamentally different tasks. -**`cascade()` prepares a delete.** It takes a single restricted table expression, propagates the restriction downstream through all descendants, and **trims the diagram** to the resulting subgraph — ancestors and unrelated tables are removed entirely. Convergence uses OR: a descendant row is marked for deletion if *any* ancestor path reaches it, because if any reason exists to remove a row, it should be removed. `cascade()` is one-shot and is always followed by `preview()` or `delete()`. +**`cascade()` prepares a delete.** It takes a single restricted table expression, propagates the restriction downstream through all descendants, and **trims the diagram** to the resulting subgraph — ancestors and unrelated tables are removed entirely. Convergence uses OR: a descendant row is marked for deletion if *any* ancestor path reaches it, because if any reason exists to remove a row, it should be removed. `cascade()` is one-shot and is always followed by `counts()` or `delete()`. When the cascade encounters a part table whose master is not yet included in the cascade, the behavior depends on the `part_integrity` setting. With `"enforce"` (the default), `delete()` raises an error if part rows would be deleted without their master — preventing orphaned master rows. With `"cascade"`, the restriction propagates *upward* from the part to its master: the restricted part rows identify which master rows are affected, those masters receive a restriction, and that restriction then propagates back downstream to all sibling parts — deleting the entire compositional unit, not just the originally matched part rows. -**`restrict()` selects a data subset.** It propagates a restriction downstream but **preserves the full diagram**, allowing `restrict()` to be called again from a different seed table. This makes it possible to build up multi-condition subsets incrementally — for example, restricting by species from one table and by date from another. Convergence uses AND: a descendant row is included only if *all* restricted ancestors match, because an export should contain only rows satisfying every condition. After chaining restrictions, use `prune()` to remove empty tables and `preview()` to inspect the result. +**`restrict()` selects a data subset.** It propagates a restriction downstream but **preserves the full diagram**, allowing `restrict()` to be called again from a different seed table. This makes it possible to build up multi-condition subsets incrementally — for example, restricting by species from one table and by date from another. Convergence uses AND: a descendant row is included only if *all* restricted ancestors match, because an export should contain only rows satisfying every condition. After chaining restrictions, use `prune()` to remove empty tables and `counts()` to inspect the result. The two modes are mutually exclusive on the same diagram — DataJoint raises an error if you attempt to mix `cascade()` and `restrict()`, or if you call `cascade()` more than once. This prevents accidental mixing of incompatible semantics: a delete diagram should never be reused for subsetting, and vice versa. @@ -256,15 +256,63 @@ export = (dj.Diagram(schema) .restrict(Session & 'session_date > "2024-01-01"') .prune()) -export.preview() # only tables with matching rows +export.counts() # only tables with matching rows export # visualize the export subgraph ``` Without prior restrictions, `prune()` removes physically empty tables. This is useful for understanding which parts of a pipeline are populated. +### Restriction Propagation Rules + +When `cascade()` or `restrict()` propagates a restriction from a parent to a child, one of three rules applies depending on the foreign key relationship: + +| Rule | Condition | Child restriction | +|------|-----------|-------------------| +| **Direct copy** | Non-aliased FK, restriction attributes are a subset of child's primary key | Restriction copied directly | +| **Aliased projection** | FK uses attribute renaming (e.g., `subject_id` → `animal_id`) | Parent projected with attribute mapping | +| **Full projection** | Non-aliased FK, restriction uses attributes not in child's primary key | Parent projected (all attributes) as restriction | + +When a child has multiple restricted ancestors, convergence depends on the mode: `cascade()` uses OR (any path marks a row for deletion), `restrict()` uses AND (all conditions must match). + +When a child references the same parent through multiple foreign keys (e.g., `source_mouse` and `target_mouse` both referencing `Mouse`), these paths always combine with OR regardless of the mode — each FK path is an independent reason for the child row to be affected. + +### Dry Run + +`Table.delete()` and `Table.drop()` accept a `dry_run` parameter that returns affected row counts without modifying data: + +```python +# Preview what would be deleted +(Session & {'subject_id': 'M001'}).delete(dry_run=True) +# {'`lab`.`session`': 3, '`lab`.`trial`': 45, '`lab`.`processed_data`': 45} + +# Preview what would be dropped +Session.drop(dry_run=True) +# {'`lab`.`session`': 100, '`lab`.`trial`': 5000} +``` + +### Unloaded Schema Detection + +If a descendant table lives in a schema that hasn't been activated, the graph-driven delete won't know about it. When the final `DELETE` fails with a foreign key error, DataJoint catches it and produces an actionable error message identifying which schema needs to be activated — rather than the opaque crash of the prior implementation. + +### Iteration API + +Diagrams support Python's iteration protocol, yielding `FreeTable` objects in topological order: + +```python +# Forward iteration (parents first) — useful for export/inspection +for ft in diagram: + print(ft.full_table_name, len(ft)) + +# Reverse iteration (leaves first) — used by delete and drop +for ft in reversed(diagram): + ft.delete_quick() +``` + +Each yielded `FreeTable` carries any cascade or restrict conditions that have been applied. `Table.delete()` and `Table.drop()` use `reversed(diagram)` internally, replacing the manual `topo_sort()` loops from prior implementations. + ### Architecture -`Table.delete()` constructs a `Diagram` internally, calls `cascade()` to compute the affected subgraph, then executes the delete itself in reverse topological order. The Diagram is purely a graph computation and inspection tool — it computes the cascade and provides `preview()`, but all mutation logic (transactions, SQL execution, prompts) lives in `Table.delete()` and `Table.drop()`. +`Table.delete()` constructs a `Diagram` internally, calls `cascade()` to compute the affected subgraph, then iterates `reversed(diagram)` to delete leaves first. The Diagram is purely a graph computation and inspection tool — it computes the cascade and provides `counts()` and iteration, but all mutation logic (transactions, SQL execution, prompts) lives in `Table.delete()` and `Table.drop()`. ### Advantages over Error-Driven Cascade @@ -278,10 +326,12 @@ The graph-driven approach resolves every known limitation of the prior error-dri | Part integrity enforcement | Post-hoc check after delete | Data-driven post-check (no false positives) | | Unloaded schemas | Crash with opaque error | Clear error: "activate schema X" | | Reusability | Delete-only | Delete, drop, export, prune | -| Inspectability | Opaque recursive cascade | `preview()` / `dry_run` before executing | +| Inspectability | Opaque recursive cascade | `counts()` / `dry_run` before executing | ## See Also +- [What's New in 2.1](whats-new-21.md) — Previous release +- [Release Notes (v2.2.0)](https://github.com/datajoint/datajoint-python/releases) — GitHub changelog - [Use Isolated Instances](../how-to/use-instances.md) — Task-oriented guide - [Working with Instances](../tutorials/advanced/instances.ipynb) — Step-by-step tutorial - [Configuration Reference](../reference/configuration.md) — Thread-safe mode settings diff --git a/src/explanation/index.md b/src/explanation/index.md index 3db5ae63..b43c0922 100644 --- a/src/explanation/index.md +++ b/src/explanation/index.md @@ -53,7 +53,7 @@ and scalable. How DataJoint ensures safe joins through attribute lineage tracking. -- :material-new-box: **[What's New in 2.0](whats-new-2.md)** +- :material-new-box: **[What's New in 2.0](../about/whats-new-2.md)** Major changes, new features, and migration guidance for DataJoint 2.0. diff --git a/src/explanation/relational-workflow-model.md b/src/explanation/relational-workflow-model.md index a48ff736..e3315367 100644 --- a/src/explanation/relational-workflow-model.md +++ b/src/explanation/relational-workflow-model.md @@ -1,69 +1,64 @@ # The Relational Workflow Model -DataJoint implements the **Relational Workflow Model**—a paradigm that extends -relational databases with native support for computational workflows. This model -defines a new class of databases called **Computational Databases**, where -computational transformations are first-class citizens of the data model. - -These concepts, along with DataJoint's schema definition language and query algebra, -were first formalized in [Yatsenko et al., 2018](https://doi.org/10.48550/arXiv.1807.11104). - -## The Problem with Traditional Approaches - -Traditional relational databases excel at storing and querying data but struggle -with computational workflows. They can store inputs and outputs, but: - -- The database doesn't understand that outputs were *computed from* inputs -- It doesn't automatically recompute when inputs change -- It doesn't track provenance - -**DataJoint solves these problems by treating your database schema as an -executable workflow specification.** +The relational data model has historically been interpreted through two +conceptual frameworks: Codd's mathematical foundation, which views tables as +logical predicates, and Chen's Entity-Relationship Model, which views tables +as entity types and relationships. The relational workflow model introduces a +third paradigm: **tables represent workflow steps, rows represent workflow +artifacts, and foreign key dependencies prescribe execution order.** This +adds an operational dimension absent from both predecessors—the schema +specifies not only what data exists but how it is derived. + +The relational workflow model and its technical innovations are formally +defined in [Yatsenko & Nguyen, 2026](https://arxiv.org/abs/2602.16585). +DataJoint's schema definition language and query algebra were first +formalized in [Yatsenko et al., 2018](https://doi.org/10.48550/arXiv.1807.11104). ## Three Paradigms Compared -The relational data model has been interpreted through different conceptual -frameworks, each with distinct strengths and limitations: - | Aspect | Mathematical (Codd) | Entity-Relationship (Chen) | **Relational Workflow (DataJoint)** | |--------|---------------------|----------------------------|-------------------------------------| -| **Core Question** | "What functional dependencies exist?" | "What entity types exist?" | **"When/how are entities created?"** | -| **Time Dimension** | Not addressed | Not central | **Fundamental** | -| **Implementation Gap** | High (abstract to SQL) | High (ERM to SQL) | **None (unified approach)** | -| **Workflow Support** | None | None | **Native workflow modeling** | +| **Core question** | What functional dependencies exist? | What entity types exist? | **When/how are entities created?** | +| **Table semantics** | Logical predicate | Entity or relationship | **Workflow step** | +| **Row semantics** | True proposition | Entity instance | **Workflow artifact** | +| **Foreign keys** | Referential integrity | Relationship | **Execution order** | +| **Computation** | Not addressed | Not addressed | **Declared in schema** | +| **Provenance** | Not addressed | Not addressed | **Structural** | +| **Implementation gap** | High | High | **None** | ### Codd's Mathematical Foundation -Edgar F. Codd's original relational model is rooted in predicate calculus and -set theory. Tables represent logical predicates; rows assert true propositions. -While mathematically rigorous, this approach requires abstract reasoning that -doesn't map to intuitive domain thinking. +Codd's mathematical foundation views tables as logical predicates and rows as +true propositions—rigorous but abstract. ### Chen's Entity-Relationship Model -Peter Chen's Entity-Relationship Model (ERM) shifted focus to concrete domain -modeling—entities and relationships visualized in diagrams. However, ERM: +Chen's Entity-Relationship Model shifted focus to domain modeling with +entities, attributes, and relationships—more intuitive, but lacking any +workflow or computational dimension. -- Creates a gap between conceptual design and SQL implementation -- Lacks temporal dimension ("when" entities are created) -- Treats relationships as static connections, not dynamic processes +## Core Concepts -## The Relational Workflow Model +### Workflow Steps and Artifacts -The Relational Workflow Model introduces four fundamental concepts: +Tables are classified into tiers by data entry mode: -### 1. Workflow Entities +| Tier | Role | `make()` | +|------|------|----------| +| **Manual** | Receive direct user entry | No | +| **Lookup** | Hold reference data | No | +| **Imported** | Reach out to data sources outside the DataJoint system (instruments, electronic lab notebooks, external databases) | Yes | +| **Computed** | Derive their contents entirely from upstream DataJoint tables | Yes | -Unlike traditional entities that exist independently, **workflow entities** are -artifacts of workflow execution—they represent the products of specific -operations. This temporal dimension allows us to understand not just *what* -exists, but *when* and *how* it came to exist. +Imported and Computed tables define computations via `make()` methods. The +`make()` method specifies how each entity is derived—this computation logic is +declared within the table definition, making it part of the schema itself +rather than an external workflow specification. -### 2. Workflow Dependencies +### Dependencies as Foreign Keys -**Workflow dependencies** extend foreign keys with operational semantics. They -don't just ensure referential integrity—they prescribe the order of operations. -Parent entities must be created before child entities. +Foreign keys define computational dependencies, not only referential integrity. +The dependency graph is explicit, queryable, and enforced by the database. ```mermaid graph LR @@ -72,142 +67,102 @@ graph LR C --> D[Analysis] ``` -### 3. Workflow Steps (Table Tiers) - -Each table represents a distinct **workflow step** with a specific role: - -```mermaid -graph TD - subgraph "Lookup (Gray)" - L[Parameters] - end - subgraph "Manual (Green)" - M[Subject] - S[Session] - end - subgraph "Imported (Blue)" - I[Recording] - end - subgraph "Computed (Red)" - C[Analysis] - end - - L --> C - M --> S - S --> I - I --> C -``` - -| Tier | Role | Examples | -|------|------|----------| -| **Lookup** | Reference data, parameters | Species, analysis methods | -| **Manual** | Human-entered observations | Subjects, sessions | -| **Imported** | Automated data acquisition | Recordings, images | -| **Computed** | Derived results | Analyses, statistics | - -### 4. Directed Acyclic Graph (DAG) +### Master-Part Relationships -The schema forms a **DAG** that: +Master-part relationships declare transactional grouping directly in the +schema: the master table represents the workflow step, while part tables hold +the individual items. Insertions and deletions cascade as a unit, enforcing +transactional semantics without application code. -- Prohibits circular dependencies -- Ensures valid execution sequences -- Enables efficient parallel execution -- Supports resumable computation +### Directed Acyclic Graph -## The Workflow Normalization Principle +Dependencies between tables form a directed acyclic graph (DAG); aggregated +dependencies between schemas likewise form a DAG. Unlike task DAGs in +workflow managers, these are *relational schema* DAGs—they define data +structure and relationships, not just execution steps. -> **"Every table represents an entity type that is created at a specific step -> in a workflow, and all attributes describe that entity as it exists at that -> workflow step."** +## Active Schemas -This principle extends entity normalization with temporal and operational -dimensions. +The key distinction from classical models: traditional schemas are +*passive*—containers for data produced by external processes. In the +relational workflow model, the schema is *active*—Computed tables declare how +their contents are derived, making the schema itself the workflow +specification. Schemas are defined as Python classes, and entire pipelines are +organized as self-contained code repositories—version-controlled, testable, +and deployable using standard software engineering practices. -## Why This Matters +A useful analogy: electronic spreadsheets unified data and computation—cells +with values alongside cells with formulas. Yet this integration never +penetrated relational databases in their 50+ years of history. The relational +workflow model brings to databases what spreadsheets brought to tabular +calculation: the recognition that data and the computations that produce it +belong together. The analogy has limits: spreadsheets' coupling is also the +source of their well-known fragility. DataJoint addresses this through formal +schema constraints and explicit dependency declaration rather than ad-hoc cell +references. -### Unified Design and Implementation +## Workflow Normalization -Unlike the ERM-SQL gap, DataJoint provides unified: +> **"Every table represents an entity type created at a specific workflow +> step, and all attributes describe that entity as it exists at that step."** -- **Diagramming** — Schema diagrams reflect actual structure -- **Definition** — Table definitions are executable code -- **Querying** — Operators understand workflow semantics +Database normalization decomposes data into tables to eliminate redundancy. +Classical normalization theory achieves this through normal forms based on +functional dependencies. Entity normalization asks whether each attribute +describes the entity identified by the primary key. Workflow normalization +extends these principles with a temporal dimension. -No translation needed between conceptual design and implementation. +A Session table contains attributes known when the session is entered (date, +experimenter, subject). Analysis parameters determined later belong in +Computed tables that depend on Session. This discipline prevents tables that +accumulate attributes from different workflow stages, obscuring provenance and +complicating updates. -### Temporal and Operational Awareness +## Entity Integrity -The model captures the dynamic nature of workflows: +All data is represented as well-formed entity sets with primary keys +identifying each entity uniquely. This eliminates redundancy and ensures +consistent updates. -- Data processing sequences -- Computational dependencies -- Operation ordering +When upstream data is deleted, dependent results cascade-delete +automatically—including associated objects in external storage. To correct +errors, you delete, reinsert, and recompute, ensuring every result represents +a consistent computation from valid inputs. -### Immutability and Provenance +## Query Algebra -Workflow artifacts are immutable once created: - -- Preserves execution history -- Maintains data provenance -- Enables reproducible science - -When you delete upstream data, dependent results cascade-delete automatically. -To correct errors, you delete, reinsert, and recompute—ensuring every result -represents a consistent computation from valid inputs. - -### Workflow Integrity - -The DAG structure guarantees: - -- No circular dependencies -- Valid operation sequences -- Enforced temporal order -- Computational validity - -## Query Algebra with Workflow Semantics - -DataJoint's five operators provide a complete query algebra: +DataJoint provides a five-operator algebra: | Operator | Symbol | Purpose | |----------|--------|---------| -| **Restriction** | `&` | Filter entities | -| **Join** | `*` | Combine from converging paths | -| **Projection** | `.proj()` | Select/compute attributes | -| **Aggregation** | `.aggr()` | Summarize groups | -| **Union** | `+` | Combine parallel branches | - -These operators: - -- Take entity sets as input, produce entity sets as output -- Preserve entity integrity -- Respect declared dependencies (no ambiguous joins) +| **Restrict** | `&` | Filter entities by attribute values or membership in other relations | +| **Project** | `.proj()` | Select and rename attributes, compute derived values | +| **Join** | `*` | Combine related entities across relations | +| **Aggregate** | `.aggr()` | Group entities and compute summary statistics | +| **Union** | `+` | Combine entity sets with compatible structure | + +The algebra achieves *algebraic closure*: every operator produces a valid +entity set with a well-defined primary key, enabling unlimited composition. +This preservation of entity integrity—every query result is itself a proper +entity set with clear identity—distinguishes DataJoint's algebra from SQL, +where query results lack both a well-defined primary key and a clear entity +type. ## From Transactions to Transformations -The Relational Workflow Model represents a conceptual shift: - | Traditional View | Workflow View | |------------------|---------------| -| Tables store data | Entity sets are workflow steps | -| Rows are records | Entities are execution instances | -| Foreign keys enforce consistency | Dependencies specify information flow | +| Tables store data | Tables represent workflow steps | +| Rows are records | Rows are workflow artifacts | +| Foreign keys enforce consistency | Foreign keys prescribe execution order | | Updates modify state | Computations create new states | | Schemas organize storage | Schemas specify pipelines | | Queries retrieve data | Queries trace provenance | -This makes DataJoint feel less like a traditional database and more like a -**workflow engine with persistent state**—one that maintains computational -validity while supporting scientific flexibility. - ## Summary -The Relational Workflow Model: - -1. **Extends** relational theory (doesn't replace it) -2. **Adds** temporal and operational semantics -3. **Eliminates** the design-implementation gap -4. **Enables** reproducible computational workflows -5. **Maintains** mathematical rigor - -It's not a departure from relational databases—it's their evolution for -computational workflows. +The relational workflow model offers a new way to understand relational +databases—not merely as storage systems but as computational substrates. By +interpreting tables as workflow steps and foreign keys as execution +dependencies, the schema becomes a complete specification of how data is +derived, not just what data exists. diff --git a/src/explanation/type-system.md b/src/explanation/type-system.md index 39ee4cc9..a6a32b80 100644 --- a/src/explanation/type-system.md +++ b/src/explanation/type-system.md @@ -269,7 +269,7 @@ result = np.mean(ref) # Downloads automatically Schema-addressed storage for files and folders. Path mirrors the database structure: `{schema}/{table}/{pk}/{attribute}`. ```python -class ProcessedData(dj.Computed): +class RecordingAnalysis(dj.Computed): definition = """ -> Recording --- diff --git a/src/how-to/alter-tables.md b/src/how-to/alter-tables.md index a336b04f..429279cd 100644 --- a/src/how-to/alter-tables.md +++ b/src/how-to/alter-tables.md @@ -199,10 +199,10 @@ For tables created before enabling job metadata: from datajoint.migrate import add_job_metadata_columns # Dry run -add_job_metadata_columns(ProcessedData, dry_run=True) +add_job_metadata_columns(SessionAnalysis, dry_run=True) # Apply -add_job_metadata_columns(ProcessedData, dry_run=False) +add_job_metadata_columns(SessionAnalysis, dry_run=False) ``` ## Best Practices diff --git a/src/how-to/delete-data.md b/src/how-to/delete-data.md index 1de724cc..e4ac66f1 100644 --- a/src/how-to/delete-data.md +++ b/src/how-to/delete-data.md @@ -175,7 +175,7 @@ with dj.conn().transaction: Session.Trial.insert(corrected_trials) # 3. Recompute derived data -ProcessedData.populate() +SessionAnalysis.populate() ``` This ensures all derived data remains consistent with source data. @@ -212,7 +212,7 @@ diag = dj.Diagram(schema) restricted = diag.cascade(Session & {'subject_id': 'M001'}) # 2. Preview: see affected tables and row counts -counts = restricted.preview() +counts = restricted.counts() # {'`lab`.`session`': 3, '`lab`.`trial`': 45, '`lab`.`processed_data`': 45} # 3. Visualize the cascade subgraph (in Jupyter) @@ -226,7 +226,7 @@ restricted - **Preview blast radius**: Understand what a cascade delete will affect before committing - **Multi-schema inspection**: Build a diagram spanning multiple schemas to visualize cascade impact -- **Programmatic control**: Use `preview()` return values to make decisions in automated workflows +- **Programmatic control**: Use `counts()` return values to make decisions in automated workflows For simple single-table deletes, `(Table & restriction).delete()` remains the simplest approach. The diagram API is for when you need more visibility before executing. diff --git a/src/how-to/distributed-computing.md b/src/how-to/distributed-computing.md index 5f523394..5380d33b 100644 --- a/src/how-to/distributed-computing.md +++ b/src/how-to/distributed-computing.md @@ -8,10 +8,10 @@ Use `reserve_jobs=True` to enable job coordination: ```python # Single worker (default) -ProcessedData.populate() +SessionAnalysis.populate() # Distributed mode with job reservation -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) ``` ## How It Works @@ -26,7 +26,7 @@ With `reserve_jobs=True`: ```python # Use multiple processes -ProcessedData.populate(reserve_jobs=True, processes=4) +SessionAnalysis.populate(reserve_jobs=True, processes=4) ``` Each process: @@ -42,10 +42,10 @@ Run the same script on multiple machines: ```python # worker_script.py - run on each machine import datajoint as dj -from my_pipeline import ProcessedData +from my_pipeline import SessionAnalysis # Each worker reserves and processes different jobs -ProcessedData.populate( +SessionAnalysis.populate( reserve_jobs=True, display_progress=True, suppress_errors=True @@ -60,13 +60,13 @@ Each auto-populated table has a jobs table (`~~table_name`): ```python # View job status -ProcessedData.jobs +SessionAnalysis.jobs # Filter by status -ProcessedData.jobs.pending -ProcessedData.jobs.reserved -ProcessedData.jobs.errors -ProcessedData.jobs.completed +SessionAnalysis.jobs.pending +SessionAnalysis.jobs.reserved +SessionAnalysis.jobs.errors +SessionAnalysis.jobs.completed ``` ## Job Statuses @@ -85,7 +85,7 @@ Sync the job queue with current key_source: ```python # Add new pending jobs, remove stale ones -result = ProcessedData.jobs.refresh() +result = SessionAnalysis.jobs.refresh() print(f"Added: {result['added']}, Removed: {result['removed']}") ``` @@ -95,10 +95,10 @@ Control processing order with priorities: ```python # Refresh with specific priority -ProcessedData.jobs.refresh(priority=1) # Lower = more urgent +SessionAnalysis.jobs.refresh(priority=1) # Lower = more urgent # Process only high-priority jobs -ProcessedData.populate(reserve_jobs=True, priority=3) +SessionAnalysis.populate(reserve_jobs=True, priority=3) ``` ## Error Recovery @@ -107,13 +107,13 @@ Handle failed jobs: ```python # View errors -errors = ProcessedData.jobs.errors +errors = SessionAnalysis.jobs.errors for job in errors.to_dicts(): print(f"Key: {job}, Error: {job['error_message']}") # Clear errors to retry errors.delete() -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) ``` ## Orphan Detection @@ -122,7 +122,7 @@ Jobs from crashed workers are automatically recovered: ```python # Refresh with orphan timeout (seconds) -ProcessedData.jobs.refresh(orphan_timeout=3600) +SessionAnalysis.jobs.refresh(orphan_timeout=3600) ``` Reserved jobs older than the timeout are reset to pending. @@ -172,10 +172,10 @@ dj.config.jobs.version_method = "git" # worker.py - run on each node from config import * -from my_pipeline import ProcessedData +from my_pipeline import SessionAnalysis while True: - result = ProcessedData.populate( + result = SessionAnalysis.populate( reserve_jobs=True, max_calls=100, suppress_errors=True, diff --git a/src/how-to/handle-errors.md b/src/how-to/handle-errors.md index 51154723..564cfcd3 100644 --- a/src/how-to/handle-errors.md +++ b/src/how-to/handle-errors.md @@ -8,10 +8,10 @@ Continue processing despite individual failures: ```python # Stop on first error (default) -ProcessedData.populate() +SessionAnalysis.populate() # Log errors but continue -ProcessedData.populate(suppress_errors=True) +SessionAnalysis.populate(suppress_errors=True) ``` ## View Failed Jobs @@ -20,10 +20,10 @@ Check the jobs table for errors: ```python # All error jobs -ProcessedData.jobs.errors +SessionAnalysis.jobs.errors # View error details -for job in ProcessedData.jobs.errors.to_dicts(): +for job in SessionAnalysis.jobs.errors.to_dicts(): print(f"Key: {job}") print(f"Message: {job['error_message']}") ``` @@ -33,7 +33,7 @@ for job in ProcessedData.jobs.errors.to_dicts(): Error stack traces are stored in the jobs table: ```python -job = (ProcessedData.jobs.errors & key).fetch1() +job = (SessionAnalysis.jobs.errors & key).fetch1() print(job['error_stack']) ``` @@ -43,10 +43,10 @@ Clear error status and rerun: ```python # Delete error records to retry -ProcessedData.jobs.errors.delete() +SessionAnalysis.jobs.errors.delete() # Reprocess -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) ``` ## Retry Specific Jobs @@ -55,10 +55,10 @@ Target specific failed jobs: ```python # Clear one error -(ProcessedData.jobs & key & "status='error'").delete() +(SessionAnalysis.jobs & key & "status='error'").delete() # Retry just that key -ProcessedData.populate(key, reserve_jobs=True) +SessionAnalysis.populate(key, reserve_jobs=True) ``` ## Ignore Problematic Jobs @@ -67,10 +67,10 @@ Mark jobs to skip permanently: ```python # Mark job as ignored -ProcessedData.jobs.ignore(key) +SessionAnalysis.jobs.ignore(key) # View ignored jobs -ProcessedData.jobs.ignored +SessionAnalysis.jobs.ignored ``` ## Error Handling in make() @@ -79,16 +79,16 @@ Handle expected errors gracefully: ```python @schema -class ProcessedData(dj.Computed): +class SessionAnalysis(dj.Computed): definition = """ - -> RawData + -> Session --- result : float64 """ def make(self, key): try: - data = (RawData & key).fetch1('data') + data = (Session & key).fetch1('data') result = risky_computation(data) except ValueError as e: # Log and skip this key @@ -117,7 +117,7 @@ def make(self, key): Get exception objects for programmatic handling: ```python -result = ProcessedData.populate( +result = SessionAnalysis.populate( suppress_errors=True, return_exception_objects=True ) @@ -133,7 +133,7 @@ for key, exception in result['error_list']: Track errors over time: ```python -progress = ProcessedData.jobs.progress() +progress = SessionAnalysis.jobs.progress() print(f"Pending: {progress.get('pending', 0)}") print(f"Errors: {progress.get('error', 0)}") print(f"Success: {progress.get('success', 0)}") diff --git a/src/how-to/monitor-progress.md b/src/how-to/monitor-progress.md index fe8c243e..cb0065fc 100644 --- a/src/how-to/monitor-progress.md +++ b/src/how-to/monitor-progress.md @@ -7,7 +7,7 @@ Track computation progress and job status. Show progress bar during populate: ```python -ProcessedData.populate(display_progress=True) +SessionAnalysis.populate(display_progress=True) ``` ## Check Remaining Work @@ -16,7 +16,7 @@ Count entries left to compute: ```python # What's left to compute -remaining = ProcessedData.key_source - ProcessedData +remaining = SessionAnalysis.key_source - SessionAnalysis print(f"{len(remaining)} entries remaining") ``` @@ -25,7 +25,7 @@ print(f"{len(remaining)} entries remaining") Get counts by status: ```python -progress = ProcessedData.jobs.progress() +progress = SessionAnalysis.jobs.progress() # {'pending': 100, 'reserved': 5, 'error': 3, 'success': 892} for status, count in progress.items(): @@ -38,19 +38,19 @@ Access jobs by their current status: ```python # Pending jobs (waiting to run) -ProcessedData.jobs.pending +SessionAnalysis.jobs.pending # Currently running -ProcessedData.jobs.reserved +SessionAnalysis.jobs.reserved # Failed jobs -ProcessedData.jobs.errors +SessionAnalysis.jobs.errors # Completed jobs (if keep_completed=True) -ProcessedData.jobs.completed +SessionAnalysis.jobs.completed # Skipped jobs -ProcessedData.jobs.ignored +SessionAnalysis.jobs.ignored ``` ## View Job Details @@ -59,10 +59,10 @@ Inspect specific jobs: ```python # All jobs for a key -(ProcessedData.jobs & key).fetch1() +(SessionAnalysis.jobs & key).fetch1() # Recent errors -ProcessedData.jobs.errors.to_dicts( +SessionAnalysis.jobs.errors.to_dicts( order_by='completed_time DESC', limit=10 ) @@ -73,7 +73,7 @@ ProcessedData.jobs.errors.to_dicts( See which workers are processing: ```python -for job in ProcessedData.jobs.reserved.to_dicts(): +for job in SessionAnalysis.jobs.reserved.to_dicts(): print(f"Key: {job}") print(f"Host: {job['host']}") print(f"PID: {job['pid']}") @@ -86,7 +86,7 @@ Track how long jobs take: ```python # Average duration of completed jobs -completed = ProcessedData.jobs.completed.to_arrays('duration') +completed = SessionAnalysis.jobs.completed.to_arrays('duration') print(f"Average: {np.mean(completed):.1f}s") print(f"Median: {np.median(completed):.1f}s") ``` @@ -112,10 +112,10 @@ This adds hidden attributes to computed tables: ```python import time -from my_pipeline import ProcessedData +from my_pipeline import SessionAnalysis while True: - remaining, total = ProcessedData.progress() + remaining, total = SessionAnalysis.progress() print(f"\rProgress: {total - remaining}/{total} ({(total - remaining) / total:.0%})", end='') @@ -130,10 +130,10 @@ For distributed mode with job tracking: ```python import time -from my_pipeline import ProcessedData +from my_pipeline import SessionAnalysis while True: - status = ProcessedData.jobs.progress() + status = SessionAnalysis.jobs.progress() print(f"\rPending: {status.get('pending', 0)} | " f"Running: {status.get('reserved', 0)} | " @@ -152,7 +152,7 @@ while True: Check multiple tables: ```python -tables = [RawData, ProcessedData, Analysis] +tables = [Session, SessionAnalysis, TrialStats] for table in tables: total = len(table.key_source) diff --git a/src/how-to/run-computations.md b/src/how-to/run-computations.md index cc6433ac..b925d116 100644 --- a/src/how-to/run-computations.md +++ b/src/how-to/run-computations.md @@ -6,45 +6,45 @@ Execute automated computations with `populate()`. ```python # Populate all missing entries -ProcessedData.populate() +SessionAnalysis.populate() # With progress display -ProcessedData.populate(display_progress=True) +SessionAnalysis.populate(display_progress=True) ``` ## Restrict What to Compute ```python # Only specific subjects -ProcessedData.populate(Subject & "sex = 'M'") +SessionAnalysis.populate(Subject & "sex = 'M'") # Only recent sessions -ProcessedData.populate(Session & "session_date > '2026-01-01'") +SessionAnalysis.populate(Session & "session_date > '2026-01-01'") # Specific key -ProcessedData.populate({'subject_id': 'M001', 'session_idx': 1}) +SessionAnalysis.populate({'subject_id': 'M001', 'session_idx': 1}) ``` ## Limit Number of Jobs ```python # Process at most 100 entries -ProcessedData.populate(limit=100) +SessionAnalysis.populate(limit=100) ``` ## Error Handling ```python # Continue on errors (log but don't stop) -ProcessedData.populate(suppress_errors=True) +SessionAnalysis.populate(suppress_errors=True) # Check what failed -failed = ProcessedData.jobs & "status = 'error'" +failed = SessionAnalysis.jobs & "status = 'error'" print(failed) # Clear errors to retry failed.delete() -ProcessedData.populate() +SessionAnalysis.populate() ``` ## When to Use Distributed Mode @@ -67,7 +67,7 @@ Choose your populate strategy based on your workload and infrastructure: **Example:** ```python # Simple, direct execution -ProcessedData.populate() +SessionAnalysis.populate() ``` --- @@ -83,13 +83,13 @@ ProcessedData.populate() - Prevents duplicate work between workers - Fault tolerance (crashed jobs can be retried) -- Job status tracking (`ProcessedData.jobs`) +- Job status tracking (`SessionAnalysis.jobs`) - Error isolation (one failure doesn't stop others) **Example:** ```python # Distributed mode with job coordination -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) ``` **Job reservation overhead:** ~100ms per job @@ -112,7 +112,7 @@ ProcessedData.populate(reserve_jobs=True) **Example:** ```python # Use 4 CPU cores -ProcessedData.populate(reserve_jobs=True, processes=4) +SessionAnalysis.populate(reserve_jobs=True, processes=4) ``` **Caution:** Don't use more processes than CPU cores (causes context switching overhead) @@ -144,10 +144,10 @@ For multi-worker coordination: ```python # Worker 1 (on machine A) -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) # Worker 2 (on machine B) -ProcessedData.populate(reserve_jobs=True) +SessionAnalysis.populate(reserve_jobs=True) # Workers coordinate automatically via database # Each reserves different keys, no duplicates @@ -157,30 +157,30 @@ ProcessedData.populate(reserve_jobs=True) ```python # What's left to compute -remaining = ProcessedData.key_source - ProcessedData +remaining = SessionAnalysis.key_source - SessionAnalysis print(f"{len(remaining)} entries remaining") # View job status -ProcessedData.jobs +SessionAnalysis.jobs ``` ## The `make()` Method ```python @schema -class ProcessedData(dj.Computed): +class SessionAnalysis(dj.Computed): definition = """ - -> RawData + -> Session --- result : float64 """ def make(self, key): # 1. Fetch input data - raw = (RawData & key).fetch1('data') + data = (Session & key).fetch1('data') # 2. Compute - result = process(raw) + result = process(data) # 3. Insert self.insert1({**key, 'result': result}) diff --git a/src/how-to/use-instances.md b/src/how-to/use-instances.md index 7e7d768d..ce696f1c 100644 --- a/src/how-to/use-instances.md +++ b/src/how-to/use-instances.md @@ -139,6 +139,6 @@ def test_insert(test_instance): ## See Also -- [What's New in 2.2](../explanation/whats-new-22.md/) — Feature overview and rationale +- [What's New in 2.2](../about/whats-new-22.md) — Feature overview and rationale - [Working with Instances](../tutorials/advanced/instances.ipynb/) — Step-by-step tutorial - [Configuration Reference](../reference/configuration.md/) — Thread-safe mode settings diff --git a/src/how-to/use-npy-codec.md b/src/how-to/use-npy-codec.md index eda41a02..4becfc0d 100644 --- a/src/how-to/use-npy-codec.md +++ b/src/how-to/use-npy-codec.md @@ -159,7 +159,7 @@ for rec in large: ```python @schema -class ProcessedData(dj.Computed): +class FilteredTrace(dj.Computed): definition = """ -> RawData --- diff --git a/src/llms.txt b/src/llms.txt index e7d2b241..68935fd5 100644 --- a/src/llms.txt +++ b/src/llms.txt @@ -6,7 +6,7 @@ ## Concepts -- [What's New in 2.0](/explanation/whats-new-2.md): Major changes and new features in DataJoint 2.0 +- [What's New in 2.0](/about/whats-new-2.md): Major changes and new features in DataJoint 2.0 - [Relational Workflow Model](/explanation/relational-workflow-model.md): Core data model concepts - [Entity Integrity](/explanation/entity-integrity.md): How DataJoint ensures data consistency - [Normalization](/explanation/normalization.md): Database normalization principles diff --git a/src/reference/configuration.md b/src/reference/configuration.md index 47d67b40..35805ae4 100644 --- a/src/reference/configuration.md +++ b/src/reference/configuration.md @@ -318,7 +318,7 @@ schema = inst.Schema("my_schema") | `inst.Schema(name)` | Create a Schema bound to this Instance | | `inst.FreeTable(full_name)` | Create a FreeTable bound to this Instance | -See [What's New in 2.2](../explanation/whats-new-22.md/) for usage examples and rationale. +See [What's New in 2.2](../about/whats-new-22.md) for usage examples and rationale. ## Thread-Safe Mode diff --git a/src/reference/specs/data-manipulation.md b/src/reference/specs/data-manipulation.md index 160aee16..3eb1efe2 100644 --- a/src/reference/specs/data-manipulation.md +++ b/src/reference/specs/data-manipulation.md @@ -357,7 +357,7 @@ About to delete: Subject: 1 rows Session: 5 rows Trial: 150 rows - ProcessedData: 150 rows + SessionAnalysis: 150 rows Commit deletes? [yes, No]: ``` diff --git a/src/reference/specs/diagram.md b/src/reference/specs/diagram.md index b3e9ea4b..2def64f9 100644 --- a/src/reference/specs/diagram.md +++ b/src/reference/specs/diagram.md @@ -120,7 +120,7 @@ dj.Diagram(Subject) + dj.Diagram(analysis).collapse() ## Operational Methods !!! version-added "New in 2.2" - Operational methods (`cascade`, `restrict`, `preview`, `prune`) were added in DataJoint 2.2. + Operational methods (`cascade`, `restrict`, `counts`, `prune`) were added in DataJoint 2.2. Diagrams can propagate restrictions through the dependency graph and inspect affected data using the graph structure. These methods turn Diagram from a visualization tool into a graph computation and inspection component. All mutation operations (delete, drop) are executed by `Table.delete()` and `Table.drop()`, which use Diagram internally. @@ -130,7 +130,7 @@ Diagrams can propagate restrictions through the dependency graph and inspect aff diag.cascade(table_expr, part_integrity="enforce") ``` -Prepare a cascading delete. Starting from a restricted table expression, propagate the restriction downstream through all descendants using **OR** semantics — a descendant row is marked for deletion if *any* ancestor path reaches it. The returned Diagram is **trimmed** to the cascade subgraph: only the seed table and its descendants remain; all ancestors and unrelated tables are removed. The trimmed diagram is ready for `preview()` and `delete()`. +Prepare a cascading delete. Starting from a restricted table expression, propagate the restriction downstream through all descendants using **OR** semantics — a descendant row is marked for deletion if *any* ancestor path reaches it. The returned Diagram is **trimmed** to the cascade subgraph: only the seed table and its descendants remain; all ancestors and unrelated tables are removed. The trimmed diagram is ready for `counts()` and `delete()`. | Parameter | Type | Default | Description | |-----------|------|---------|-------------| @@ -189,13 +189,13 @@ restricted = (diag .restrict(Session & 'session_date > "2024-01-01"')) ``` -### `preview()` +### `counts()` ```python -diag.preview() +diag.counts() ``` -Show affected tables and row counts without modifying data. Works with both `cascade()` and `restrict()` restrictions. +Return affected row counts per table without modifying data. Works with both `cascade()` and `restrict()` restrictions. **Returns:** `dict[str, int]` — mapping of full table names to affected row counts. @@ -204,7 +204,7 @@ Show affected tables and row counts without modifying data. Works with both `cas ```python diag = dj.Diagram(schema) restricted = diag.cascade(Session & {'subject_id': 'M001'}) -counts = restricted.preview() +counts = restricted.counts() # {'`lab`.`session`': 3, '`lab`.`trial`': 45, '`lab`.`processed_data`': 45} ``` @@ -227,10 +227,23 @@ export = (dj.Diagram(schema) .restrict(Session & 'session_date > "2024-01-01"') .prune()) -export.preview() # only tables with matching rows +export.counts() # only tables with matching rows export # visualize the export subgraph ``` +### Iteration + +Diagrams support iteration in topological order: + +| Method | Order | Use Case | +|--------|-------|----------| +| `for ft in diagram` | Parents first | Data export, inspection | +| `for ft in reversed(diagram)` | Leaves first | Cascade delete, drop | + +Each iteration yields a `FreeTable` with any cascade or restrict conditions applied. Alias nodes are skipped. Only nodes in the diagram's visible set (`nodes_to_show`) are yielded. + +`Table.delete()` and `Table.drop()` use `reversed(diagram)` internally to execute mutations in safe dependency order. + ### Restriction Propagation When `cascade()` or `restrict()` propagates a restriction from a parent table to a child table, one of three rules applies depending on the foreign key relationship: @@ -440,7 +453,7 @@ combined = dj.Diagram.from_sequence([schema1, schema2, schema3]) ## Dependencies -Operational methods (`cascade`, `restrict`, `preview`, `prune`) use `networkx`, which is always installed as a core dependency. +Operational methods (`cascade`, `restrict`, `counts`, `prune`) use `networkx`, which is always installed as a core dependency. Diagram **visualization** requires optional dependencies: @@ -456,7 +469,7 @@ If visualization dependencies are missing, `dj.Diagram` displays a warning and p - [How to Read Diagrams](../../how-to/read-diagrams.ipynb) - [Delete Data](../../how-to/delete-data.md) — Cascade inspection and delete workflow -- [What's New in 2.2](../../explanation/whats-new-22.md) — Motivation and design +- [What's New in 2.2](../../about/whats-new-22.md) — Motivation and design - [Data Manipulation](data-manipulation.md) — Insert, update, delete specification - [Query Algebra](query-algebra.md) - [Table Declaration](table-declaration.md) diff --git a/src/reference/specs/job-metadata.md b/src/reference/specs/job-metadata.md index 9d11f4db..e7b1bd97 100644 --- a/src/reference/specs/job-metadata.md +++ b/src/reference/specs/job-metadata.md @@ -318,9 +318,9 @@ def _get_job_version() -> str: dj.config.jobs.add_job_metadata = True @schema -class ProcessedData(dj.Computed): +class SessionAnalysis(dj.Computed): definition = """ - -> RawData + -> Session --- result : float """ @@ -333,11 +333,11 @@ class ProcessedData(dj.Computed): # _job_start_time, _job_duration, _job_version # User-facing API unaffected: -ProcessedData().heading.names # ['raw_data_id', 'result'] -ProcessedData().to_dicts() # Returns only visible attributes +SessionAnalysis().heading.names # ['session_id', 'result'] +SessionAnalysis().to_dicts() # Returns only visible attributes # Access hidden attributes explicitly if needed: -ProcessedData().to_arrays('_job_start_time', '_job_duration', '_job_version') +SessionAnalysis().to_arrays('_job_start_time', '_job_duration', '_job_version') ``` ## Summary of Design Decisions diff --git a/src/reference/specs/table-declaration.md b/src/reference/specs/table-declaration.md index dc400b69..19596e2f 100644 --- a/src/reference/specs/table-declaration.md +++ b/src/reference/specs/table-declaration.md @@ -31,7 +31,7 @@ class TableName(dj.Manual): ### 1.3 Class Naming Rules -- **Format**: Strict CamelCase (e.g., `MyTable`, `ProcessedData`) +- **Format**: Strict CamelCase (e.g., `MyTable`, `SessionAnalysis`) - **Pattern**: `^[A-Z][A-Za-z0-9]*$` - **Conversion**: CamelCase to snake_case for SQL table name - **Examples**: diff --git a/src/reference/specs/type-system.md b/src/reference/specs/type-system.md index 03920382..994da1da 100644 --- a/src/reference/specs/type-system.md +++ b/src/reference/specs/type-system.md @@ -526,9 +526,9 @@ class BlobCodec(dj.Codec): Usage: ```python -class ProcessedData(dj.Computed): +class SessionAnalysis(dj.Computed): definition = """ - -> RawData + -> Session --- small_result : # in-table (in database) large_result : # in-store (default store) diff --git a/src/tutorials/advanced/instances.ipynb b/src/tutorials/advanced/instances.ipynb index 8930e951..a33547af 100644 --- a/src/tutorials/advanced/instances.ipynb +++ b/src/tutorials/advanced/instances.ipynb @@ -15,7 +15,7 @@ "- Connect to multiple databases simultaneously\n", "- Understand when to use Instances vs the global pattern\n", "\n", - "> **New in DataJoint 2.2.** For the rationale behind Instances, see [What's New in 2.2](../../explanation/whats-new-22.md)." + "> **New in DataJoint 2.2.** For the rationale behind Instances, see [What's New in 2.2](../../about/whats-new-22.md)." ] }, { @@ -693,7 +693,7 @@ "\n", "### Next Steps\n", "\n", - "- [What's New in 2.2](../../explanation/whats-new-22.md) — Feature overview and rationale\n", + "- [What's New in 2.2](../../about/whats-new-22.md) — Feature overview and rationale\n", "- [Use Isolated Instances](../../how-to/use-instances.md) — Task-oriented guide\n", "- [Configuration Reference](../../reference/configuration.md) — Thread-safe mode settings" ] diff --git a/src/tutorials/basics/03-data-entry.ipynb b/src/tutorials/basics/03-data-entry.ipynb index 9d371b11..afb7f740 100644 --- a/src/tutorials/basics/03-data-entry.ipynb +++ b/src/tutorials/basics/03-data-entry.ipynb @@ -23,10 +23,10 @@ "id": "cell-1", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:33.732025Z", - "iopub.status.busy": "2026-02-19T18:32:33.731867Z", - "iopub.status.idle": "2026-02-19T18:32:34.070946Z", - "shell.execute_reply": "2026-02-19T18:32:34.070614Z" + "iopub.execute_input": "2026-03-13T18:18:16.752427Z", + "iopub.status.busy": "2026-03-13T18:18:16.752306Z", + "iopub.status.idle": "2026-03-13T18:18:17.970943Z", + "shell.execute_reply": "2026-03-13T18:18:17.970262Z" } }, "outputs": [ @@ -34,7 +34,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] DataJoint 2.1.1 connected to postgres@postgres:5432\n" + "[2026-03-13 13:18:17][WARNING]: SSL connection failed (connection to server at \"localhost\" (::1), port 5432 failed: server does not support SSL, but SSL was required\n", + "). Falling back to non-SSL connection. To require SSL, set use_tls=True explicitly.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-03-13 13:18:17] DataJoint 2.2.0.dev0 connected to postgres@localhost:5432\n" ] } ], @@ -51,10 +59,10 @@ "id": "cell-2", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.072355Z", - "iopub.status.busy": "2026-02-19T18:32:34.072160Z", - "iopub.status.idle": "2026-02-19T18:32:34.116087Z", - "shell.execute_reply": "2026-02-19T18:32:34.115756Z" + "iopub.execute_input": "2026-03-13T18:18:17.972948Z", + "iopub.status.busy": "2026-03-13T18:18:17.972702Z", + "iopub.status.idle": "2026-03-13T18:18:18.044756Z", + "shell.execute_reply": "2026-03-13T18:18:18.044342Z" } }, "outputs": [], @@ -99,7 +107,7 @@ " \"\"\"\n", "\n", "@schema\n", - "class ProcessedData(dj.Computed):\n", + "class SessionAnalysis(dj.Computed):\n", " definition = \"\"\"\n", " -> Session\n", " ---\n", @@ -131,10 +139,10 @@ "id": "cell-4", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.117170Z", - "iopub.status.busy": "2026-02-19T18:32:34.117085Z", - "iopub.status.idle": "2026-02-19T18:32:34.122852Z", - "shell.execute_reply": "2026-02-19T18:32:34.122568Z" + "iopub.execute_input": "2026-03-13T18:18:18.046347Z", + "iopub.status.busy": "2026-03-13T18:18:18.046233Z", + "iopub.status.idle": "2026-03-13T18:18:18.054371Z", + "shell.execute_reply": "2026-03-13T18:18:18.054079Z" } }, "outputs": [ @@ -255,10 +263,10 @@ "id": "cell-6", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.123834Z", - "iopub.status.busy": "2026-02-19T18:32:34.123740Z", - "iopub.status.idle": "2026-02-19T18:32:34.127854Z", - "shell.execute_reply": "2026-02-19T18:32:34.127624Z" + "iopub.execute_input": "2026-03-13T18:18:18.055630Z", + "iopub.status.busy": "2026-03-13T18:18:18.055536Z", + "iopub.status.idle": "2026-03-13T18:18:18.060215Z", + "shell.execute_reply": "2026-03-13T18:18:18.060002Z" } }, "outputs": [ @@ -402,10 +410,10 @@ "id": "cell-8", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.128703Z", - "iopub.status.busy": "2026-02-19T18:32:34.128624Z", - "iopub.status.idle": "2026-02-19T18:32:34.133871Z", - "shell.execute_reply": "2026-02-19T18:32:34.133616Z" + "iopub.execute_input": "2026-03-13T18:18:18.061354Z", + "iopub.status.busy": "2026-03-13T18:18:18.061264Z", + "iopub.status.idle": "2026-03-13T18:18:18.080180Z", + "shell.execute_reply": "2026-03-13T18:18:18.079931Z" } }, "outputs": [ @@ -448,10 +456,10 @@ "id": "cell-10", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.134741Z", - "iopub.status.busy": "2026-02-19T18:32:34.134662Z", - "iopub.status.idle": "2026-02-19T18:32:34.137956Z", - "shell.execute_reply": "2026-02-19T18:32:34.137614Z" + "iopub.execute_input": "2026-03-13T18:18:18.081421Z", + "iopub.status.busy": "2026-03-13T18:18:18.081333Z", + "iopub.status.idle": "2026-03-13T18:18:18.083748Z", + "shell.execute_reply": "2026-03-13T18:18:18.083521Z" } }, "outputs": [ @@ -488,10 +496,10 @@ "id": "cell-12", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.139129Z", - "iopub.status.busy": "2026-02-19T18:32:34.138985Z", - "iopub.status.idle": "2026-02-19T18:32:34.141879Z", - "shell.execute_reply": "2026-02-19T18:32:34.141609Z" + "iopub.execute_input": "2026-03-13T18:18:18.085067Z", + "iopub.status.busy": "2026-03-13T18:18:18.084902Z", + "iopub.status.idle": "2026-03-13T18:18:18.087397Z", + "shell.execute_reply": "2026-03-13T18:18:18.087191Z" } }, "outputs": [ @@ -536,10 +544,10 @@ "id": "cell-16", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.142868Z", - "iopub.status.busy": "2026-02-19T18:32:34.142782Z", - "iopub.status.idle": "2026-02-19T18:32:34.145980Z", - "shell.execute_reply": "2026-02-19T18:32:34.145660Z" + "iopub.execute_input": "2026-03-13T18:18:18.088439Z", + "iopub.status.busy": "2026-03-13T18:18:18.088354Z", + "iopub.status.idle": "2026-03-13T18:18:18.090345Z", + "shell.execute_reply": "2026-03-13T18:18:18.090109Z" } }, "outputs": [ @@ -568,10 +576,10 @@ "id": "cell-17", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.146960Z", - "iopub.status.busy": "2026-02-19T18:32:34.146805Z", - "iopub.status.idle": "2026-02-19T18:32:34.151016Z", - "shell.execute_reply": "2026-02-19T18:32:34.150562Z" + "iopub.execute_input": "2026-03-13T18:18:18.091429Z", + "iopub.status.busy": "2026-03-13T18:18:18.091347Z", + "iopub.status.idle": "2026-03-13T18:18:18.094480Z", + "shell.execute_reply": "2026-03-13T18:18:18.094222Z" } }, "outputs": [ @@ -614,10 +622,10 @@ "id": "cell-19", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.152216Z", - "iopub.status.busy": "2026-02-19T18:32:34.152119Z", - "iopub.status.idle": "2026-02-19T18:32:34.161747Z", - "shell.execute_reply": "2026-02-19T18:32:34.161496Z" + "iopub.execute_input": "2026-03-13T18:18:18.095747Z", + "iopub.status.busy": "2026-03-13T18:18:18.095645Z", + "iopub.status.idle": "2026-03-13T18:18:18.109995Z", + "shell.execute_reply": "2026-03-13T18:18:18.109761Z" } }, "outputs": [ @@ -781,10 +789,10 @@ "id": "cell-21", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.162866Z", - "iopub.status.busy": "2026-02-19T18:32:34.162740Z", - "iopub.status.idle": "2026-02-19T18:32:34.168076Z", - "shell.execute_reply": "2026-02-19T18:32:34.167708Z" + "iopub.execute_input": "2026-03-13T18:18:18.111235Z", + "iopub.status.busy": "2026-03-13T18:18:18.111118Z", + "iopub.status.idle": "2026-03-13T18:18:18.114594Z", + "shell.execute_reply": "2026-03-13T18:18:18.114337Z" } }, "outputs": [ @@ -816,10 +824,10 @@ "id": "cell-22", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.169120Z", - "iopub.status.busy": "2026-02-19T18:32:34.169030Z", - "iopub.status.idle": "2026-02-19T18:32:34.173811Z", - "shell.execute_reply": "2026-02-19T18:32:34.173520Z" + "iopub.execute_input": "2026-03-13T18:18:18.115646Z", + "iopub.status.busy": "2026-03-13T18:18:18.115544Z", + "iopub.status.idle": "2026-03-13T18:18:18.118887Z", + "shell.execute_reply": "2026-03-13T18:18:18.118644Z" } }, "outputs": [ @@ -867,10 +875,10 @@ "id": "cell-24", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.174862Z", - "iopub.status.busy": "2026-02-19T18:32:34.174780Z", - "iopub.status.idle": "2026-02-19T18:32:34.178071Z", - "shell.execute_reply": "2026-02-19T18:32:34.177687Z" + "iopub.execute_input": "2026-03-13T18:18:18.120003Z", + "iopub.status.busy": "2026-03-13T18:18:18.119895Z", + "iopub.status.idle": "2026-03-13T18:18:18.121592Z", + "shell.execute_reply": "2026-03-13T18:18:18.121382Z" } }, "outputs": [ @@ -898,10 +906,10 @@ "id": "cell-25", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.179224Z", - "iopub.status.busy": "2026-02-19T18:32:34.179133Z", - "iopub.status.idle": "2026-02-19T18:32:34.183116Z", - "shell.execute_reply": "2026-02-19T18:32:34.182771Z" + "iopub.execute_input": "2026-03-13T18:18:18.122739Z", + "iopub.status.busy": "2026-03-13T18:18:18.122625Z", + "iopub.status.idle": "2026-03-13T18:18:18.124507Z", + "shell.execute_reply": "2026-03-13T18:18:18.124281Z" } }, "outputs": [ @@ -939,10 +947,10 @@ "id": "cell-27", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.184282Z", - "iopub.status.busy": "2026-02-19T18:32:34.184186Z", - "iopub.status.idle": "2026-02-19T18:32:34.189022Z", - "shell.execute_reply": "2026-02-19T18:32:34.188607Z" + "iopub.execute_input": "2026-03-13T18:18:18.125576Z", + "iopub.status.busy": "2026-03-13T18:18:18.125470Z", + "iopub.status.idle": "2026-03-13T18:18:18.128644Z", + "shell.execute_reply": "2026-03-13T18:18:18.128450Z" } }, "outputs": [ @@ -986,10 +994,10 @@ "id": "cell-29", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.190142Z", - "iopub.status.busy": "2026-02-19T18:32:34.190034Z", - "iopub.status.idle": "2026-02-19T18:32:34.218899Z", - "shell.execute_reply": "2026-02-19T18:32:34.218561Z" + "iopub.execute_input": "2026-03-13T18:18:18.129701Z", + "iopub.status.busy": "2026-03-13T18:18:18.129611Z", + "iopub.status.idle": "2026-03-13T18:18:18.176578Z", + "shell.execute_reply": "2026-03-13T18:18:18.176305Z" } }, "outputs": [ @@ -997,9 +1005,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sessions: 1\n", - "Trials: 5\n", - "ProcessedData: 1\n" + "Sessions: 1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Trials: 5\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SessionAnalysis: 1\n" ] } ], @@ -1009,8 +1029,8 @@ "print(f\"Trials: {len(Session.Trial())}\")\n", "\n", "# Populate computed table\n", - "ProcessedData.populate()\n", - "print(f\"ProcessedData: {len(ProcessedData())}\")" + "SessionAnalysis.populate()\n", + "print(f\"SessionAnalysis: {len(SessionAnalysis())}\")" ] }, { @@ -1019,10 +1039,10 @@ "id": "cell-30", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.220120Z", - "iopub.status.busy": "2026-02-19T18:32:34.220016Z", - "iopub.status.idle": "2026-02-19T18:32:34.239831Z", - "shell.execute_reply": "2026-02-19T18:32:34.239434Z" + "iopub.execute_input": "2026-03-13T18:18:18.177815Z", + "iopub.status.busy": "2026-03-13T18:18:18.177726Z", + "iopub.status.idle": "2026-03-13T18:18:18.504789Z", + "shell.execute_reply": "2026-03-13T18:18:18.504462Z" } }, "outputs": [ @@ -1030,21 +1050,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 5 rows from \"tutorial_data_entry\".\"session__trial\"\n" + "[2026-03-13 13:18:18] Deleting 1 rows from \"tutorial_data_entry\".\"__session_analysis\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 1 rows from \"tutorial_data_entry\".\"__processed_data\"\n" + "[2026-03-13 13:18:18] Deleting 5 rows from \"tutorial_data_entry\".\"session__trial\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" + "[2026-03-13 13:18:18] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" ] }, { @@ -1054,18 +1074,18 @@ "After delete:\n", "Sessions: 0\n", "Trials: 0\n", - "ProcessedData: 0\n" + "SessionAnalysis: 0\n" ] } ], "source": [ - "# Delete a session - cascades to Trial and ProcessedData\n", + "# Delete a session - cascades to Trial and SessionAnalysis\n", "(Session & {'subject_id': 'M001', 'session_idx': 1}).delete(prompt=False)\n", "\n", "print(f\"After delete:\")\n", "print(f\"Sessions: {len(Session())}\")\n", "print(f\"Trials: {len(Session.Trial())}\")\n", - "print(f\"ProcessedData: {len(ProcessedData())}\")" + "print(f\"SessionAnalysis: {len(SessionAnalysis())}\")" ] }, { @@ -1095,10 +1115,10 @@ "id": "cell-32", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.241418Z", - "iopub.status.busy": "2026-02-19T18:32:34.241304Z", - "iopub.status.idle": "2026-02-19T18:32:34.253003Z", - "shell.execute_reply": "2026-02-19T18:32:34.252662Z" + "iopub.execute_input": "2026-03-13T18:18:18.506322Z", + "iopub.status.busy": "2026-03-13T18:18:18.506184Z", + "iopub.status.idle": "2026-03-13T18:18:18.562055Z", + "shell.execute_reply": "2026-03-13T18:18:18.561791Z" } }, "outputs": [ @@ -1106,14 +1126,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 2 rows from \"tutorial_data_entry\".\"session__trial\"\n" + "[2026-03-13 13:18:18] Deleting 0 rows from \"tutorial_data_entry\".\"__session_analysis\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2026-03-13 13:18:18] Deleting 2 rows from \"tutorial_data_entry\".\"session__trial\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" + "[2026-03-13 13:18:18] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" ] }, { @@ -1163,10 +1190,10 @@ "id": "cell-34", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.254087Z", - "iopub.status.busy": "2026-02-19T18:32:34.253996Z", - "iopub.status.idle": "2026-02-19T18:32:34.263080Z", - "shell.execute_reply": "2026-02-19T18:32:34.262667Z" + "iopub.execute_input": "2026-03-13T18:18:18.563371Z", + "iopub.status.busy": "2026-03-13T18:18:18.563235Z", + "iopub.status.idle": "2026-03-13T18:18:18.574618Z", + "shell.execute_reply": "2026-03-13T18:18:18.574336Z" } }, "outputs": [ @@ -1195,8 +1222,8 @@ " ])\n", "\n", "# Compute results\n", - "ProcessedData.populate()\n", - "print(\"Before correction:\", ProcessedData.fetch1())" + "SessionAnalysis.populate()\n", + "print(\"Before correction:\", SessionAnalysis.fetch1())" ] }, { @@ -1205,10 +1232,10 @@ "id": "cell-35", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.264206Z", - "iopub.status.busy": "2026-02-19T18:32:34.264097Z", - "iopub.status.idle": "2026-02-19T18:32:34.287804Z", - "shell.execute_reply": "2026-02-19T18:32:34.287511Z" + "iopub.execute_input": "2026-03-13T18:18:18.576005Z", + "iopub.status.busy": "2026-03-13T18:18:18.575907Z", + "iopub.status.idle": "2026-03-13T18:18:18.643148Z", + "shell.execute_reply": "2026-03-13T18:18:18.642856Z" } }, "outputs": [ @@ -1216,21 +1243,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 2 rows from \"tutorial_data_entry\".\"session__trial\"\n" + "[2026-03-13 13:18:18] Deleting 1 rows from \"tutorial_data_entry\".\"__session_analysis\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 1 rows from \"tutorial_data_entry\".\"__processed_data\"\n" + "[2026-03-13 13:18:18] Deleting 2 rows from \"tutorial_data_entry\".\"session__trial\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[2026-02-19 18:32:34] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" + "[2026-03-13 13:18:18] Deleting 1 rows from \"tutorial_data_entry\".\"session\"\n" ] }, { @@ -1243,13 +1270,13 @@ ], "source": [ "# Suppose we discovered trial 2 was actually a 'hit' not 'miss'\n", - "# WRONG: Updating the trial would leave ProcessedData stale!\n", + "# WRONG: Updating the trial would leave SessionAnalysis stale!\n", "# Session.Trial.update1({...}) # DON'T DO THIS\n", "\n", "# CORRECT: Delete, reinsert, recompute\n", "key = {'subject_id': 'M003', 'session_idx': 1}\n", "\n", - "# 1. Delete cascades to ProcessedData\n", + "# 1. Delete cascades to SessionAnalysis\n", "(Session & key).delete(prompt=False)\n", "\n", "# 2. Reinsert with corrected data (using transaction)\n", @@ -1261,8 +1288,8 @@ " ])\n", "\n", "# 3. Recompute\n", - "ProcessedData.populate()\n", - "print(\"After correction:\", ProcessedData.fetch1())" + "SessionAnalysis.populate()\n", + "print(\"After correction:\", SessionAnalysis.fetch1())" ] }, { @@ -1281,10 +1308,10 @@ "id": "cell-37", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.288803Z", - "iopub.status.busy": "2026-02-19T18:32:34.288661Z", - "iopub.status.idle": "2026-02-19T18:32:34.291919Z", - "shell.execute_reply": "2026-02-19T18:32:34.291636Z" + "iopub.execute_input": "2026-03-13T18:18:18.644454Z", + "iopub.status.busy": "2026-03-13T18:18:18.644357Z", + "iopub.status.idle": "2026-03-13T18:18:18.647237Z", + "shell.execute_reply": "2026-03-13T18:18:18.647053Z" } }, "outputs": [ @@ -1319,10 +1346,10 @@ "id": "cell-38", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.293014Z", - "iopub.status.busy": "2026-02-19T18:32:34.292923Z", - "iopub.status.idle": "2026-02-19T18:32:34.295847Z", - "shell.execute_reply": "2026-02-19T18:32:34.295565Z" + "iopub.execute_input": "2026-03-13T18:18:18.648317Z", + "iopub.status.busy": "2026-03-13T18:18:18.648217Z", + "iopub.status.idle": "2026-03-13T18:18:18.650397Z", + "shell.execute_reply": "2026-03-13T18:18:18.650176Z" } }, "outputs": [ @@ -1369,10 +1396,10 @@ "id": "cell-40", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.296898Z", - "iopub.status.busy": "2026-02-19T18:32:34.296803Z", - "iopub.status.idle": "2026-02-19T18:32:34.300943Z", - "shell.execute_reply": "2026-02-19T18:32:34.300635Z" + "iopub.execute_input": "2026-03-13T18:18:18.651456Z", + "iopub.status.busy": "2026-03-13T18:18:18.651348Z", + "iopub.status.idle": "2026-03-13T18:18:18.656223Z", + "shell.execute_reply": "2026-03-13T18:18:18.655906Z" } }, "outputs": [ @@ -1494,10 +1521,10 @@ "id": "cell-43", "metadata": { "execution": { - "iopub.execute_input": "2026-02-19T18:32:34.301868Z", - "iopub.status.busy": "2026-02-19T18:32:34.301770Z", - "iopub.status.idle": "2026-02-19T18:32:34.306011Z", - "shell.execute_reply": "2026-02-19T18:32:34.305552Z" + "iopub.execute_input": "2026-03-13T18:18:18.657445Z", + "iopub.status.busy": "2026-03-13T18:18:18.657355Z", + "iopub.status.idle": "2026-03-13T18:18:18.661007Z", + "shell.execute_reply": "2026-03-13T18:18:18.660761Z" } }, "outputs": [], @@ -1523,7 +1550,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/src/tutorials/index.md b/src/tutorials/index.md index 32a5d839..c5f71695 100644 --- a/src/tutorials/index.md +++ b/src/tutorials/index.md @@ -130,6 +130,10 @@ Standard pipelines for neurophysiology experiments, actively used in many labs w - [Electrophysiology](domain/electrophysiology/electrophysiology.ipynb) — Import recordings, spike detection, waveforms - [Allen CCF](domain/allen-ccf/allen-ccf.ipynb) — Hierarchical brain atlas ontology +**Complete demo pipeline:** + +- [LC-MS Demo](https://github.com/datajoint/lcms-demo) — Liquid chromatography-mass spectrometry pipeline showcasing DataJoint best practices with PostgreSQL: sample tracking, scan acquisition, mass spectral analysis, and parameterized peak detection + **General patterns:** - [Hotel Reservations](examples/hotel-reservations.ipynb) — Booking systems with resource management