Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 0 additions & 211 deletions packaging/metadata.py

This file was deleted.

3 changes: 3 additions & 0 deletions packaging/metadata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._types import DynamicField, Metadata

__all__ = ["DynamicField", "Metadata"]
181 changes: 181 additions & 0 deletions packaging/metadata/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
from __future__ import annotations

import enum
from typing import Optional, Tuple, TypedDict

from ..version import Version
from ._utils import as_list_str, as_str
from ._validation import RegexValidator, Required, eagerly_validate, lazy_validator
from .raw import RawMetadata, parse_email, parse_json

# Type aliases.
_NameAndEmail = Tuple[Optional[str], str]
_LabelAndURL = Tuple[str, str]


@enum.unique
class DynamicField(enum.Enum):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if it is worth sticking with an enum or just with lowercase string literals for the metadata field names? Same goes for known/supported metadata versions.

"""
An :class:`enum.Enum` representing fields which can be listed in the ``Dynamic``
field of `core metadata`_.

Every valid field is a name on this enum, upper-cased with any ``-`` replaced with
``_``. Each value is the field name lower-cased (``-`` are kept). For example, the
``Home-page`` field has a name of ``HOME_PAGE`` and a value of ``home-page``.
"""

# `Name`, `Version`, and `Metadata-Version` are invalid in `Dynamic`.
# 1.0
PLATFORM = "platform"
SUMMARY = "summary"
DESCRIPTION = "description"
KEYWORDS = "keywords"
HOME_PAGE = "home-page"
AUTHOR = "author"
AUTHOR_EMAIL = "author-email"
LICENSE = "license"
# 1.1
SUPPORTED_PLATFORM = "supported-platform"
DOWNLOAD_URL = "download-url"
CLASSIFIER = "classifier"
# 1.2
MAINTAINER = "maintainer"
MAINTAINER_EMAIL = "maintainer-email"
REQUIRES_DIST = "requires-dist"
REQUIRES_PYTHON = "requires-python"
REQUIRES_EXTERNAL = "requires-external"
PROJECT_URL = "project-url"
PROVIDES_DIST = "provides-dist"
OBSOLETES_DIST = "obsoletes-dist"
# 2.1
DESCRIPTION_CONTENT_TYPE = "description-content-type"
PROVIDES_EXTRA = "provides-extra"


@enum.unique
class MetadataVersion(enum.Enum):
v1_0 = "1.0"
v1_1 = "1.1"
v1_2 = "1.2"
v2_0 = "2.0"
v2_1 = "2.1"
v2_2 = "2.2"
v2_3 = "2.3"


class _ValidatedMetadata(TypedDict, total=False):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So would this class have a key for each piece of metadata that we are willing to perform conversions/validation on from raw metadata?

# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: Version
platforms: list[str]
summary: str
# description: str
# keywords: List[str]
# home_page: str
# author: str
# author_email: str
# license: str


class Metadata:

# We store our "actual" metadata as a RawMetadata, which
# gives is a little bit of indirection here. The RawMetadata
# class is lenient as to what it will consider valid, but this
# class is not.
#
# However, we want to support validation to happen both up front
# and on the fly as you access attributes, and when using the
# on the fly validation, we don't want to validate anything else
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# on the fly validation, we don't want to validate anything else
# on-the-fly validation, we don't want to validate anything else

# except for the specific piece of metadata that is being
# asked for.
#
# That means that we need to store, at least initially, the
# metadata in a form that is lenient, which is exactly the
# purpose of RawMetadata.
_raw: RawMetadata

# Likewise, we need a place to store our honest to goodness actually
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Likewise, we need a place to store our honest to goodness actually
# Likewise, we need a place to store our honest-to-goodness, actually

# validated metadata too, we could just store this in a dict, but
# this will give us better typing.
Comment on lines +100 to +101
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# validated metadata too, we could just store this in a dict, but
# this will give us better typing.
# validated metadata, too. We could just store this in a dict, but
# this will give us better typing.

_validated: _ValidatedMetadata

def __init__(self) -> None:
raise NotImplementedError

# It's not exactly the most pythonic thing to have a bunch of getter/setters
# like this for every attribute, however this enables us to do our on the
# fly validation.

# Metadata-Version: Metadata 1.0
_metadata_version = lazy_validator(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this private while the other metadata fields below are public?

MetadataVersion, raw_name="metadata_version", validators=[Required()]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed there's no naming of the metadata field, e.g. "Metadata-Version". What black magic are you doing in what I'm going to assume is a descriptor to infer that (especially since the examples below don't all specify a raw_name?

)
# Name: Metadata 1.0
name = lazy_validator(
as_str,
validators=[
Required(),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So for the optional fields would the descriptor return None?

RegexValidator("(?i)^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$"),
],
)
# Version: Metadata 1.0
version = lazy_validator(Version, validators=[Required()])
# Platform: Metadata 1.0
platforms = lazy_validator(as_list_str)
summary = lazy_validator(as_str)

@classmethod
def from_raw(cls, raw: RawMetadata, *, validate: bool = True) -> Metadata:
# Ok this is some kind of gross code here, but it has a specific
# purpose.
#
# We want to enable the progrmatic API of the Metadata
# class to strictly validate, including requires data, so
# we want something like Metadata("foo", "1.0", ...), but
# we also want from_raw to *not* require that data, so we
# treat our __init__ as our public constructor, then we bypass
# the __init__ when calling from_raw to let us setup the object
# in a completely different way, without exposing that as
# programatic API in and of itself.
meta = cls.__new__(cls)
meta._raw = raw
meta._validated = _ValidatedMetadata()

# It's not possible to use Metadata without validating, but the
# validate parameter here lets people control whether the entire
# metadata gets validated up front, or whether it gets validated
# on demand.
if validate:
eagerly_validate(meta)

return meta

@classmethod
def from_email(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
raw, unparsed = parse_email(data)

# Regardless of the validate attribute, we don't let unparsed data
# pass silently, if someone wants to drop unparsed data on the floor
# they can call parse_email themselves and pass it into from_raw
if unparsed:
raise ValueError(
f"Could not parse, extra keys: {', '.join(unparsed.keys())}"
)

return cls.from_raw(raw, validate=validate)

@classmethod
def from_json(cls, data: bytes | str, *, validate: bool = True) -> Metadata:
raw, unparsed = parse_json(data)

# Regardless of the validate attribute, we don't let unparsed data
# pass silently, if someone wants to drop unparsed data on the floor
# they can call parse_email themselves and pass it into from_raw
if unparsed:
raise ValueError(
f"Could not parse, extra keys: {', '.join(unparsed.keys())}"
)

return cls.from_raw(raw, validate=validate)
Loading