From a461bf187f03b2d6ec310fa5229093801ba0d8ab Mon Sep 17 00:00:00 2001 From: Austin Noto-Moniz Date: Fri, 5 Jan 2024 13:07:13 -0500 Subject: [PATCH 1/2] [PLA-13752] Address doc generation warnings. --- docs/source/data_extraction.rst | 4 +- docs/source/getting_started/datasets.rst | 2 +- docs/source/index.rst | 2 +- docs/source/modules.rst | 4 - docs/source/molecular_generation.rst | 74 +++++++++++++++++++ docs/source/setup.rst | 7 -- docs/source/workflows/getting_started.rst | 2 +- .../predictor_evaluation_workflows.rst | 1 + docs/source/workflows/predictors.rst | 3 +- .../ingredient_ratio_constraint.py | 4 +- .../hierarchical_design_space.py | 4 +- src/citrine/resources/dataset.py | 30 +++----- src/citrine/resources/file_link.py | 29 +++----- src/citrine/resources/ingestion.py | 32 ++------ src/citrine/resources/material_run.py | 6 -- src/citrine/resources/process_run.py | 10 --- src/citrine/resources/process_spec.py | 10 --- src/citrine/resources/project.py | 12 +-- src/citrine/resources/team.py | 15 +--- 19 files changed, 120 insertions(+), 131 deletions(-) delete mode 100644 docs/source/modules.rst create mode 100644 docs/source/molecular_generation.rst delete mode 100644 docs/source/setup.rst diff --git a/docs/source/data_extraction.rst b/docs/source/data_extraction.rst index 00b49f4b5..0fc53e6f1 100644 --- a/docs/source/data_extraction.rst +++ b/docs/source/data_extraction.rst @@ -8,7 +8,7 @@ A GEM Table is defined on a set of material histories, and the rows in the resul Columns correspond to data about the material histories, such as the temperature measured in a kiln used at a specific manufacturing step. Defining rows and columns ------------------------- +------------------------- A Row object describes a mapping from a list of Datasets to rows of a table by selecting a set of Material Histories. Each Material History corresponds to exactly one row, though the Material Histories may overlap such that the same objects contribute data to multiple rows. @@ -327,4 +327,4 @@ are compatible with each type of descriptor: - :class:`~citrine.informatics.descriptors.ChemicalFormulaDescriptor`: values of type :class:`~gemd.entity.EmpiricalFormula`, or values of type :class:`~gemd.entity.NominalComposition` when **all** quantity keys are valid atomic symbols - :class:`~citrine.informatics.descriptors.FormulationDescriptor`: all values extracted by ingredient quantity, identifier, and label variables - are used to represent the formulation \ No newline at end of file + are used to represent the formulation diff --git a/docs/source/getting_started/datasets.rst b/docs/source/getting_started/datasets.rst index 9898a752c..bd840d3b0 100644 --- a/docs/source/getting_started/datasets.rst +++ b/docs/source/getting_started/datasets.rst @@ -74,7 +74,7 @@ Assume you have a "band gaps project" with known id, ``band_gaps_project_id``, a Dataset Access, Sharing, and Transfer ------------------------------------- +------------------------------------- When a Dataset is created on the Citrine Platform, only members of the project in which it was created can see it and interact with it. If a Dataset is made public, it (and its entire contents) can be retrieved by any user using any project. diff --git a/docs/source/index.rst b/docs/source/index.rst index dc802834f..491ebd19c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ Welcome to the Citrine Python client documentation! This site documents the Python SDK for the Citrine Platform. It provides utilities to upload and manage data and design materials using Sequential Learning. -See the :ref:`getting started ` guide for a high-level introduction. +See the :ref:`getting started ` guide for a high-level introduction. The :ref:`workflows ` section documents how to configure and run artificial intelligence (AI) workflows for materials research and development. Installation diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index f026acd70..000000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,4 +0,0 @@ -.. toctree:: - :maxdepth: 4 - - setup \ No newline at end of file diff --git a/docs/source/molecular_generation.rst b/docs/source/molecular_generation.rst new file mode 100644 index 000000000..04fd10156 --- /dev/null +++ b/docs/source/molecular_generation.rst @@ -0,0 +1,74 @@ +.. generative_design_execution: + +[ALPHA] Generative Design Execution +=================================== +The Citrine Platform offers a Generative Design Execution tool that allows the creation of new molecules by applying mutations to a set of given seed molecules. +To use this feature, you need to provide a set of starting molecules and filtering parameters using the :class:`~citrine.informatics.generative_design.GenerativeDesignInput` class. + +The class requires you to define the seed molecules for generating mutations, the fingerprint type used to calculate the `fingerprint similarity `_, the minimum fingerprint similarity between the seed and mutated molecule, the number of initial mutations attempted per seed, and the minimum substructure counts for each mutated molecule. + +Various fingerprint types are available on the Citrine Platform, including Atom Pairs (AP), Path-Length Connectivity (PHCO), Binary Path (BPF), Paths of Atoms of Heteroatoms (PATH), Extended Connectivity Fingerprint with radius 4 (ECFP4) and radius 6 (ECFP6), and Focused Connectivity Fingerprint with radius 4 (FCFP4) and radius 6 (FCFP6). +Each fingerprint type captures different aspects of molecular structure and influences the generated mutations. +You can access these fingerprint types through the :class:`~citrine.informatics.generative_design.FingerprintType` enum, like `FingerprintType.ECFP4`. + +The `structure_exclusions` parameter allows you to control the structural features of mutated molecules. +It is a sequence of exclusion types corresponding to the types of structural features or elements to exclude from the list of possible mutation steps during the generative design process. +If a type is present in the sequence, the mutation steps generated by the process will avoid using that feature or element. +The available structure exclusion options can be found in the :class:`~citrine.informatics.generative_design.StructureExclusion` class. + +The `min_substructure_counts` parameter is a dictionary for constraining which substructures (represented by SMILES strings) must appear in each mutated molecule, along with integer values representing the minimum number of times each substructure must appear in a molecule to be considered a valid mutation. + +After the generative design process is complete, the mutations are filtered based on their similarity to the starting seed molecules. +Mutations that do not meet the similarity threshold or are duplicates will be discarded. The remaining mutations are returned as a subset of the original mutations in the form of a list of :class:`~citrine.informatics.generative_design.GenerativeDesignResult` objects. +These results contain information about the seed molecule, the mutation, the similarity score, and the fingerprint type used during execution. + +After triggering the execution and waiting for completion, the user can retrieve the results and utilize them in their work.' +The following example demonstrates how to run a generative design execution on the Citrine Platform using the Citrine Python client. + +.. code-block:: python + + import os + from citrine import Citrine + from citrine.jobs.waiting import wait_while_executing + from citrine.informatics.generative_design import GenerativeDesignInput, FingerprintType, StructureExclusion + + session = Citrine( + api_key=os.environ.get("API_KEY"), + scheme="https", + host=os.environ.get("CITRINE_HOST"), + port="443", + ) + + team_uid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + project_uid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + team = session.teams.get(team_uid) + project = team.projects.get(project_uid) + + # Trigger a new generative design execution + generative_design_input = GenerativeDesignInput( + seeds=["CC(O)=O", "CCCCCCCCCCCC"], + fingerprint_type=FingerprintType.ECFP4, + min_fingerprint_similarity=0.1, + mutation_per_seed=1000, + structure_exclusions=[ + StructureExclusion.BROMINE, + StructureExclusion.CHLORINE, + ], + min_substructure_counts={"c1ccccc1": 1} + ) + generative_design_execution = project.generative_design_executions.trigger( + generative_design_input + ) + execution = wait_while_executing( + collection=project.generative_design_executions, execution=generative_design_execution + ) + generated = execution.results() + mutations = [(gen.seed, gen.mutated) for gen in generated] + + # Or get a completed execution by ID + execution_uid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + execution = project.generative_design_executions.get(execution_uid) + generated = execution.results() + mutations = [(gen.seed, gen.mutated) for gen in generated] + +To execute the code, replace the `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` placeholders with valid UIDs from your Citrine environment. Ensure that the API key, scheme, host, and port are correctly specified in the `Citrine` initialization. diff --git a/docs/source/setup.rst b/docs/source/setup.rst deleted file mode 100644 index 552eb49d6..000000000 --- a/docs/source/setup.rst +++ /dev/null @@ -1,7 +0,0 @@ -setup module -============ - -.. automodule:: setup - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/workflows/getting_started.rst b/docs/source/workflows/getting_started.rst index 0cf4dcba7..3294151e8 100644 --- a/docs/source/workflows/getting_started.rst +++ b/docs/source/workflows/getting_started.rst @@ -1,4 +1,4 @@ -.. _getting-started: +.. _ai-engine-getting-started: Getting Started =============== diff --git a/docs/source/workflows/predictor_evaluation_workflows.rst b/docs/source/workflows/predictor_evaluation_workflows.rst index 145a8744d..bf88a3859 100644 --- a/docs/source/workflows/predictor_evaluation_workflows.rst +++ b/docs/source/workflows/predictor_evaluation_workflows.rst @@ -15,6 +15,7 @@ Metrics are specified as a set of :class:`PredictorEvaluationMetrics `. .. _Expression Predictor: + Expression predictor -------------------- diff --git a/src/citrine/informatics/constraints/ingredient_ratio_constraint.py b/src/citrine/informatics/constraints/ingredient_ratio_constraint.py index 0149a0bfd..f85364ce1 100644 --- a/src/citrine/informatics/constraints/ingredient_ratio_constraint.py +++ b/src/citrine/informatics/constraints/ingredient_ratio_constraint.py @@ -13,8 +13,10 @@ class IngredientRatioConstraint(Serializable['IngredientRatioConstraint'], Const """A formulation constraint operating on the ratio of quantities of ingredients and a basis. Example: "6 to 7 parts ingredient A per 100 parts ingredient B" becomes + .. code:: python - IngredientRatioConstraint(min=6, max=7, ingredient=("A", 100), basis_ingredients=["B"]) + + IngredientRatioConstraint(min=6, max=7, ingredient=("A", 100), basis_ingredients=["B"]) Parameters ---------- diff --git a/src/citrine/informatics/design_spaces/hierarchical_design_space.py b/src/citrine/informatics/design_spaces/hierarchical_design_space.py index 457a35227..3a7d7b6e5 100644 --- a/src/citrine/informatics/design_spaces/hierarchical_design_space.py +++ b/src/citrine/informatics/design_spaces/hierarchical_design_space.py @@ -124,11 +124,11 @@ class HierarchicalDesignSpace(EngineResource["HierarchicalDesignSpace"], DesignS referencing other sub-nodes, allowing for the linkage of complex material history shapes in the resulting candidates. - Every node also contains a set of :class:`~citrine.informatics.dimensions.Dimension`s + Every node also contains a set of :class:`~citrine.informatics.dimensions.Dimension`\\s used to define any attributes (i.e., properties, processing parameters) that will appear on the materials produced by that node. - :class:`~citrine.informatics.data_sources.DataSource`s can be included on the configuration + :class:`~citrine.informatics.data_sources.DataSource`\\s can be included on the configuration to allow for design over "known" materials. The Citrine Platform will look up the ingredient names from formulation subspaces on the design space nodes in order to inject their composition/properties into the material history of the candidates. diff --git a/src/citrine/resources/dataset.py b/src/citrine/resources/dataset.py index 1583192db..b89a65ea6 100644 --- a/src/citrine/resources/dataset.py +++ b/src/citrine/resources/dataset.py @@ -53,45 +53,33 @@ class Dataset(Resource['Dataset']): unique_name: Optional[str] An optional, globally unique name that can be used to retrieve the dataset. - Attributes - ---------- - uid: UUID - Unique uuid4 identifier of this dataset. - deleted: bool - Flag indicating whether or not this dataset has been deleted. - created_by: UUID - ID of the user who created the dataset. - updated_by: UUID - ID of the user who last updated the dataset. - deleted_by: UUID - ID of the user who deleted the dataset, if it is deleted. - create_time: int - Time the dataset was created, in seconds since epoch. - update_time: int - Time the dataset was most recently updated, in seconds since epoch. - delete_time: int - Time the dataset was deleted, in seconds since epoch, if it is deleted. - public: bool - Flag indicating whether the dataset is publicly readable. - """ _response_key = 'dataset' _resource_type = ResourceTypeEnum.DATASET uid = properties.Optional(properties.UUID(), 'id') + """UUID: Unique uuid4 identifier of this dataset.""" name = properties.String('name') unique_name = properties.Optional(properties.String(), 'unique_name') summary = properties.String('summary') description = properties.String('description') deleted = properties.Optional(properties.Boolean(), 'deleted') + """bool: Flag indicating whether or not this dataset has been deleted.""" created_by = properties.Optional(properties.UUID(), 'created_by') + """UUID: ID of the user who created the dataset.""" updated_by = properties.Optional(properties.UUID(), 'updated_by') + """UUID: ID of the user who last updated the dataset.""" deleted_by = properties.Optional(properties.UUID(), 'deleted_by') + """UUID: ID of the user who deleted the dataset, if it is deleted.""" create_time = properties.Optional(properties.Datetime(), 'create_time') + """int: Time the dataset was created, in seconds since epoch.""" update_time = properties.Optional(properties.Datetime(), 'update_time') + """int: Time the dataset was most recently updated, in seconds since epoch.""" delete_time = properties.Optional(properties.Datetime(), 'delete_time') + """int: Time the dataset was deleted, in seconds since epoch, if it is deleted.""" public = properties.Optional(properties.Boolean(), 'public') + """bool: Flag indicating whether the dataset is publicly readable.""" project_id = properties.Optional(properties.UUID(), 'project_id', serializable=False, deserializable=False) session = properties.Optional(properties.Object(Session), 'session', diff --git a/src/citrine/resources/file_link.py b/src/citrine/resources/file_link.py index 7185bd489..2ea840660 100644 --- a/src/citrine/resources/file_link.py +++ b/src/citrine/resources/file_link.py @@ -132,26 +132,6 @@ class FileLink( url: str URL that can be used to access the file. - Attributes - ---------- - uid: UUID - Unique uuid4 identifier of this file; consistent across versions. - version: UUID - Unique uuid4 identifier of this version of this file - version_number: Integer - How many times this file has been uploaded; - files are the "same" if the share a filename and dataset - created_time: Datetime - Time the file was created on platform. - created_by: UUID - Unique uuid4 identifier of this User who loaded this file - mime_type: String - Encoded string representing the type of the file (IETF RFC 2045) - size: Integer - Size in bytes of the file - description: String - A human-readable description of the file - """ # NOTE: skipping the "metadata" field since it appears to be unused @@ -160,13 +140,22 @@ class FileLink( filename = properties.String('filename') url = properties.String('url') uid = properties.Optional(properties.UUID, 'id', serializable=False) + """UUID: Unique uuid4 identifier of this file; consistent across versions.""" version = properties.Optional(properties.UUID, 'version', serializable=False) + """UUID: Unique uuid4 identifier of this version of this file.""" created_time = properties.Optional(properties.Datetime, 'created_time', serializable=False) + """datetime: Time the file was created on platform.""" created_by = properties.Optional(properties.UUID, 'created_by', serializable=False) + """UUID: Unique uuid4 identifier of this User who loaded this file.""" mime_type = properties.Optional(properties.String, 'mime_type', serializable=False) + """str: Encoded string representing the type of the file (IETF RFC 2045).""" size = properties.Optional(properties.Integer, 'size', serializable=False) + """int: Size in bytes of the file.""" description = properties.Optional(properties.String, 'description', serializable=False) + """str: A human-readable description of the file.""" version_number = properties.Optional(properties.Integer, 'version_number', serializable=False) + """int: How many times this file has been uploaded; files are the "same" if they share a + filename and dataset.""" def __init__(self, filename: str, url: str): GEMDFileLink.__init__(self, filename, url) diff --git a/src/citrine/resources/ingestion.py b/src/citrine/resources/ingestion.py index af8693e9e..a0b3db5fd 100644 --- a/src/citrine/resources/ingestion.py +++ b/src/citrine/resources/ingestion.py @@ -107,19 +107,13 @@ def __repr__(self): class IngestionException(CitrineException): - """ - [ALPHA] An exception that contains details of a failed ingestion. - - Attributes - ---------- - uid: Optional[UUID] - errors: List[IngestionErrorTrace] - - """ + """[ALPHA] An exception that contains details of a failed ingestion.""" uid = properties.Optional(properties.UUID(), 'ingestion_id', default=None) + """Optional[UUID]""" status = properties.Enumeration(IngestionStatusType, "status") errors = properties.List(properties.Object(IngestionErrorTrace), "errors") + """List[IngestionErrorTrace]""" def __init__(self, *, @@ -147,20 +141,14 @@ def from_api_error(cls, source: ApiError) -> "IngestionException": class IngestionStatus(Resource['IngestionStatus']): - """ - [ALPHA] An object that represents the outcome of an ingestion event. - - Attributes - ---------- - uid: String - status: IngestionStatusType - errors: List[IngestionErrorTrace] - - """ + """[ALPHA] An object that represents the outcome of an ingestion event.""" uid = properties.Optional(properties.UUID(), 'ingestion_id', default=None) + """UUID""" status = properties.Enumeration(IngestionStatusType, "status") + """IngestionStatusType""" errors = properties.List(properties.Object(IngestionErrorTrace), "errors") + """List[IngestionErrorTrace]""" def __init__(self, *, @@ -190,14 +178,10 @@ class Ingestion(Resource['Ingestion']): every object in that dataset. A user with write access to a dataset can create, update, and delete objects in the dataset. - Attributes - ---------- - uid: UUID - Unique uuid4 identifier of this ingestion. - """ uid = properties.UUID('ingestion_id') + """UUID: Unique uuid4 identifier of this ingestion.""" project_id = properties.UUID('project_id') dataset_id = properties.UUID('dataset_id') session = properties.Object(Session, 'session', serializable=False) diff --git a/src/citrine/resources/material_run.py b/src/citrine/resources/material_run.py index 0c1157f8c..6420541b8 100644 --- a/src/citrine/resources/material_run.py +++ b/src/citrine/resources/material_run.py @@ -50,12 +50,6 @@ class MaterialRun( file_links: List[FileLink], optional Links to associated files, with resource paths into the files API. - Attributes - ---------- - measurements: List[MeasurementRun], optional - Measurements performed on this material. The link is established by creating the - measurement run and settings its `material` field to this material run. - """ _response_key = GEMDMaterialRun.typ # 'material_run' diff --git a/src/citrine/resources/process_run.py b/src/citrine/resources/process_run.py index 8468487e8..0920eb08f 100644 --- a/src/citrine/resources/process_run.py +++ b/src/citrine/resources/process_run.py @@ -47,16 +47,6 @@ class ProcessRun(GEMDResource['ProcessRun'], ObjectRun, GEMDProcessRun, typ=GEMD source: PerformedSource, optional Information about the person who performed the run and when. - Attributes - ---------- - output_material: MaterialRun - The material run that this process run produces. The link is established by creating - the material run and settings its `process` field to this process run. - - ingredients: List[IngredientRun] - Ingredient runs that act as inputs to this process run. The link is established by - creating each ingredient run and setting its `process` field to this process run. - """ _response_key = GEMDProcessRun.typ # 'process_run' diff --git a/src/citrine/resources/process_spec.py b/src/citrine/resources/process_spec.py index 54439a41a..55f45c97f 100644 --- a/src/citrine/resources/process_spec.py +++ b/src/citrine/resources/process_spec.py @@ -49,16 +49,6 @@ class ProcessSpec( file_links: List[FileLink], optional Links to associated files, with resource paths into the files API. - Attributes - ---------- - output_material: MaterialSpec - The material spec that this process spec produces. The link is established by creating - the material spec and settings its `process` field to this process spec. - - ingredients: List[IngredientSpec], optional - Ingredient specs that act as inputs to this process spec. The link is established by - creating each ingredient spec and setting its `process` field to this process spec. - """ _response_key = GEMDProcessSpec.typ # 'process_spec' diff --git a/src/citrine/resources/project.py b/src/citrine/resources/project.py index f08cab49d..1fb8f90b2 100644 --- a/src/citrine/resources/project.py +++ b/src/citrine/resources/project.py @@ -63,15 +63,6 @@ class Project(Resource['Project']): session: Session, optional The Citrine session used to connect to the database. - Attributes - ---------- - uid: UUID - Unique uuid4 identifier of this project. - status: str - Status of the project. - created_at: int - Time the project was created, in seconds since epoch. - """ _response_key = 'project' @@ -80,8 +71,11 @@ class Project(Resource['Project']): name = properties.String('name') description = properties.Optional(properties.String(), 'description') uid = properties.Optional(properties.UUID(), 'id') + """UUID: Unique uuid4 identifier of this project.""" status = properties.Optional(properties.String(), 'status') + """str: Status of the project.""" created_at = properties.Optional(properties.Datetime(), 'created_at') + """int: Time the project was created, in seconds since epoch.""" team_id = properties.Optional(properties.UUID, "team.id", serializable=False) def __init__(self, diff --git a/src/citrine/resources/team.py b/src/citrine/resources/team.py index 71f42ed0a..0731ce112 100644 --- a/src/citrine/resources/team.py +++ b/src/citrine/resources/team.py @@ -121,17 +121,6 @@ class Team(Resource['Team']): session: Session, optional The Citrine session used to connect to the database. - Attributes - ---------- - uid: UUID - Unique uuid4 identifier of this team. - created_at: int - Time the team was created, in seconds since epoch. - name: str - Name of the Team - description: str - Description of the Team - """ _response_key = 'team' @@ -139,9 +128,13 @@ class Team(Resource['Team']): _api_version = "v3" name = properties.String('name') + """str: Name of the Team""" description = properties.Optional(properties.String(), 'description') + """str: Description of the Team""" uid = properties.Optional(properties.UUID(), 'id') + """UUID: Unique uuid4 identifier of this team.""" created_at = properties.Optional(properties.Datetime(), 'created_at') + """int: Time the team was created, in seconds since epoch.""" def __init__(self, name: str, From 7202b4aa0dd9fad0010cf0caa8d7fa6c95679d33 Mon Sep 17 00:00:00 2001 From: Austin Noto-Moniz Date: Fri, 5 Jan 2024 15:57:38 -0500 Subject: [PATCH 2/2] [PLA-13752] Add link for API Reference. Since we're generating our API docs, one of our tables of contents must link to it, or else it will throw a warning. So I've added it to the main page. That main page TOC goes multiple levels deep, so it will show the generated page's name. Since we were starting it at src/, that was its name, which wouldn't make much sense to users. So I tweaked it to start at src/citrine/. --- docs/source/conf.py | 4 ++-- docs/source/index.rst | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 1e00c88af..5b9aa88be 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ import citrine import os import sys -sys.path.insert(0, os.path.abspath('../../src')) +sys.path.insert(0, os.path.abspath('../../src/citrine')) # -- Project information ----------------------------------------------------- @@ -44,7 +44,7 @@ # build. # # See: https://github.com/sphinx-contrib/apidoc -apidoc_module_dir = '../../src' +apidoc_module_dir = '../../src/citrine' apidoc_output_dir = 'reference' apidoc_excluded_paths = ['tests'] apidoc_separate_modules = True diff --git a/docs/source/index.rst b/docs/source/index.rst index 491ebd19c..c57f8ed45 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -41,6 +41,7 @@ Table of Contents formulations_example molecular_generation FAQ/index + API Reference Indices and tables ==================