Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions submit_ce/domain/event/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
Classification, License
from ..exceptions import InvalidEvent


__all__ = [
make_event,
validators,
Expand Down Expand Up @@ -219,6 +220,24 @@ def project(self, submission: Submission) -> Submission:
return submission


class SetProxyInformation(Event):
"""Set proxy information."""
proxied_name: str
proxied_email: str
proxy_name: str

def apply(self, submission: Submission) -> Submission:
# We need to use the Creator dataclass. This holds
# submitter_name and submitter_email (from legacy).
# Proxy name setting indicates that these fields are
# set by submitter and may not correspond to
# user_id (2.0) or submitter_id (1.5).
submission.creator.name = self.proxied_name
submission.creator.email = self.proxied_email
submission.proxy = self.proxy_name
return submission


class ConfirmAuthorship(Event):
"""The submitting user asserts whether they are an author of the paper."""

Expand Down
21 changes: 19 additions & 2 deletions submit_ce/domain/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from .process import ProcessStatus
from .util import get_tzaware_utc_now


@dataclass
class Author:
"""Represents an author of a submission."""
Expand Down Expand Up @@ -303,7 +302,7 @@ class Submission:

creator: User
owner: User
proxy: Optional[User] = field(default=None)
proxy: Optional[str] = field(default=None)
client: Optional[Client] = field(default=None)
created: Optional[datetime] = field(default=None)
updated: Optional[datetime] = field(default=None)
Expand Down Expand Up @@ -364,6 +363,24 @@ class Submission:
waivers: Dict[str, Waiver] = field(default_factory=dict)
"""Quality control waivers."""

# Derived / presentation
# These should eventually replace creator, since creator
# does not represent creator's submitter name and email when
# proxying a submissions for someone else.
@property
def contact_name(self) -> str:
"""
Who appears as the 'From' / contact name.
"""
return self.creator.name

@property
def contact_email(self) -> str:
"""
Who appears as the 'From' / contact email.
"""
return self.creator.email

@property
def features(self) -> Dict[str, Feature]:
return {k: v for k, v in self.annotations.items()
Expand Down
64 changes: 32 additions & 32 deletions submit_ce/implementations/legacy_implementation/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,10 @@
from .models import DBEvent
from .patch import patch_cross, patch_hold, patch_jref, patch_withdrawal
from submit_ce import domain
from submit_ce.domain import Event, Submission, User, WithdrawalRequest, CrossListClassificationRequest, License
from submit_ce.domain import Event, Submission, User, WithdrawalRequest, CrossListClassificationRequest, License
from submit_ce.domain.event import SetJournalReference, SetDOI, SetReportNumber, CreateSubmission, Rollback
from submit_ce.domain.exceptions import NoSuchSubmission


logger = logging.getLogger(__name__)
logger.propagate = False

Expand All @@ -70,11 +69,14 @@

# retry = _retry
retry: Callable[..., Callable[[F], F]] = _retry


# wraps: Callable[[F], F] = _wraps


def handle_operational_errors(func: F) -> F:
"""Catch SQLAlchemy OperationalErrors and raise :class:`.Unavailable`."""

@wraps(func)
def inner(*args: Any, **kwargs: Any) -> Any:
try:
Expand All @@ -90,6 +92,7 @@ def inner(*args: Any, **kwargs: Any) -> Any:
raise OperationalError('Classic database unavailable',
getattr(e, 'params', None),
getattr(e, 'orig', e)) from e

# return inner
return cast(F, inner)

Expand Down Expand Up @@ -128,7 +131,7 @@ def get_events(session: SQLAlchemySession, submission_id: str) -> List[Event]:
.filter(DBEvent.submission_id == submission_id) \
.order_by(DBEvent.created)
events = [datum.to_event() for datum in event_data]
if not events: # No events, no dice.
if not events: # No events, no dice.
logger.error('No events for submission %s', submission_id)
raise NoSuchSubmission(f'Submission {submission_id} not found')
return events
Expand Down Expand Up @@ -193,9 +196,9 @@ def get_submission(session: SQLAlchemySession, submission_id: str, for_update: b
.filter(models.Submission.submission_id != submission_id) \
.order_by(models.Submission.submission_id.asc())

if for_update: # Lock these rows as well.
if for_update: # Lock these rows as well.
subsequent_query = subsequent_query.with_for_update(read=True)
subsequent_rows = list(subsequent_query) # Execute query.
subsequent_rows = list(subsequent_query) # Execute query.
logger.debug('Got subsequent_rows: %s', subsequent_rows)

try:
Expand Down Expand Up @@ -223,7 +226,6 @@ def get_submission(session: SQLAlchemySession, submission_id: str, for_update: b
return interpolator.get_submission_state()



# @retry(ClassicBaseException, tries=3, delay=1)
@handle_operational_errors
def store_event(session: SQLAlchemySession, event: Event, before: Optional[Submission], after: Submission) -> Tuple[Event, Submission]:
Expand Down Expand Up @@ -265,15 +267,15 @@ def store_event(session: SQLAlchemySession, event: Event, before: Optional[Submi

doc_id: Optional[int] = None
# This is the case that we have a new submission.
if before is None: # and isinstance(after, Submission):
if before is None: # and isinstance(after, Submission):
dbs = models.Submission(type=models.Submission.NEW_SUBMISSION)
dbs.update_from_submission(after)
this_is_a_new_submission = True

else: # Otherwise we're making an update for an existing submission.
else: # Otherwise we're making an update for an existing submission.
this_is_a_new_submission = False

if before.arxiv_id is not None: #:
if before.arxiv_id is not None: #:
# After the original submission is announced, a new Document row is
# created. This Document is shared by all subsequent Submission rows.
doc_id = _load_document_id(session, before.arxiv_id, before.version)
Expand All @@ -283,18 +285,18 @@ def store_event(session: SQLAlchemySession, event: Event, before: Optional[Submi
# database.
if after.version > before.version:
dbs = _create_replacement(doc_id, before.arxiv_id,
after.version, after, event.created)
after.version, after, event.created)
elif isinstance(event, Rollback) and before.version > 1:
dbs = _delete_replacement(session, doc_id, before.arxiv_id,
before.version)
before.version)


# Withdrawals also require a new row, and they use the most recent
# version number.
elif isinstance(event, RequestWithdrawal):
dbs = _create_withdrawal(doc_id, event.reason,
before.arxiv_id, after.version, after,
event.created)
before.arxiv_id, after.version, after,
event.created)
elif isinstance(event, RequestCrossList):
dbs = _create_crosslist(doc_id, event.categories,
before.arxiv_id, after.version, after,
Expand All @@ -304,7 +306,7 @@ def store_event(session: SQLAlchemySession, event: Event, before: Optional[Submi
# also requires a new row. The version number is not incremented.
elif before.is_announced and type(event) in JREFEvents:
dbs = _create_jref(session, doc_id, before.arxiv_id, after.version, after,
event.created)
event.created)

elif isinstance(event, CancelRequest):
dbs = _cancel_request(session, event, before, after)
Expand Down Expand Up @@ -341,7 +343,7 @@ def store_event(session: SQLAlchemySession, event: Event, before: Optional[Submi
session.add(db_event)
event.committed = True

log.handle(session, event, before, after) # Create admin log entry.
log.handle(session, event, before, after) # Create admin log entry.

# Update the domain event and submission states with the submission ID.
# This should carry forward the original submission ID, even if the
Expand Down Expand Up @@ -456,7 +458,7 @@ def _create_withdrawal(document_id: int, reason: str, paper_id: str,
document_id=document_id,
version=version,
remote_addr=submission.client.remote_addr,
remote_host=submission.client.remote_host,
remote_host=submission.client.remote_host,
)
dbs.update_withdrawal(submission, reason, paper_id, version, created)
return dbs
Expand Down Expand Up @@ -578,17 +580,22 @@ def to_submission(row: models.Submission,
primary = row.primary_classification
if row.submitter is None:
submitter = domain.PublicUser(user_id=str(row.submitter_id),
email=row.submitter_email,
name=row.submitter_name)
email=row.submitter_email,
name=row.submitter_name)
else:
submitter = row.get_submitter()
if submission_id is None:
submission_id = str(row.submission_id)
else:
submission_id = str(submission_id)

client = HttpClient(remote_addr = row.remote_addr,
remote_host = row.remote_host)
if row.proxy:
proxy = str(row.proxy)
else:
proxy = None

client = HttpClient(remote_addr=row.remote_addr,
remote_host=row.remote_host)

license: Optional[domain.License] = None
if row.license:
Expand Down Expand Up @@ -646,7 +653,8 @@ def to_submission(row: models.Submission,
primary_classification=primary_clsn,
secondary_classification=secondary_clsn,
arxiv_id=row.doc_paper_id,
version=row.version
version=row.version,
proxy=proxy
)
if row.sticky_status == row.ON_HOLD or row.status == row.ON_HOLD:
submission = patch_hold(submission, row)
Expand Down Expand Up @@ -682,7 +690,7 @@ def load(rows: Iterable[models.Submission]) -> Optional[domain.Submission]:
# Creation time isn't all that precise in the classic database, so
# we'll use submission ID instead.
these_version_rows = sorted([v for v in version_rows],
key=lambda o: o.submission_id)
key=lambda o: o.submission_id)
logger.debug('Version %s: %s', version, version_rows)
# We use the original ID to track the entire lifecycle of the
# submission in NG.
Expand Down Expand Up @@ -752,8 +760,8 @@ def announce_submission(session: SQLAlchemySession, submission_id: str) -> None:
head.status = Submission.ANNOUNCED
if head.document is None:
paper_id = datetime.now().strftime('%s')[-4:] \
+ "." \
+ datetime.now().strftime('%s')[-5:]
+ "." \
+ datetime.now().strftime('%s')[-5:]
head.document = models.Document(paper_id=paper_id)
head.doc_paper_id = paper_id
session.add(head)
Expand All @@ -763,11 +771,3 @@ def announce_submission(session: SQLAlchemySession, submission_id: str) -> None:
def _get_head_idx(session: SQLAlchemySession, rows: List[Submission]) -> int:
"""bdc34: Not sure what this is"""
raise NotImplementedError()








49 changes: 35 additions & 14 deletions submit_ce/implementations/legacy_implementation/interpolate.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
"""
Inject events from outside the scope of the NG submission system.

A core concept of the :mod:`arxiv.submission.domain.event` model is that
the state of a submission can be obtained by playing forward all of the
commands/events applied to it. That works when all agents that operate
on submission state are generating commands. The problem that we face in
the short term is that some operations will be performed by legacy components
that don't generate command/event data.

The objective of the :class:`ClassicEventInterpolator` is to reconcile
NG events/commands with aspects of the classic database that are outside its
current purview. The logic in this module will need to change as the scope
of the NG submission data architecture expands.
"""Given a legacy 1.5 submission, make a submit 2.0 `List[Event]` that would create
a submission in that state.

A core concept of the :mod:`arxiv.submission.domain.event` model is that the
state of a submission can be obtained by playing forward all of the
commands/events applied to it. This is called "event sourcing". Right now,
2026-04, we are not using event sourcing and submit 2.0 just writes to the db.
The events are still valuable because they are a high fidelity history of
changes to the submission.

Event sourcing works when all agents that operate on submission state are
generating commands. The problem faced in the short term is that some
operations will be performed by legacy components that don't generate
command/event data.

The objective of the :class:`ClassicEventInterpolator` is to reconcile NG
events/commands with the classic database that are outside its event system.

The logic in this module will need to change as the scope of the NG submission
data architecture expands.

"""

import logging
Expand All @@ -33,6 +40,7 @@
SetDOI,
SetJournalReference,
SetMSCClassification,
SetProxyInformation,
SetReportNumber,
SetTitle,
)
Expand Down Expand Up @@ -266,6 +274,13 @@ def _inject(self, event_type: Type[Event], **data: Any) -> None:

def _apply(self, event: Event) -> None:
self.submission = event.apply(self.submission)

# Temporary
logger.error(
"AFTER _apply: submission.proxy=%r",
self.submission.proxy
)

self.applied_events.append(event)

def _backport_event(self, event: Event) -> None:
Expand Down Expand Up @@ -322,4 +337,10 @@ def get_submission_state(self) -> Tuple[Submission, List[Event]]:
assert self.submission is not None
logger.debug('done; submission in state %s with %i events',
self.submission.status, len(self.applied_events))
# Temporary Debugging
logger.error(
"BEFORE RETURN: submission.proxy=%r",
self.submission.proxy
)

return self.submission, self.applied_events
3 changes: 3 additions & 0 deletions submit_ce/implementations/legacy_implementation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@ def update_from_submission(self, submission: domain.Submission) -> None:
self.remote_addr = str(submission.client.remote_addr)
self.remote_host = submission.client.remote_host or ""

self.proxy = submission.proxy


@property
def primary_classification(self) -> Optional['Category']:
"""Get the primary classification for this submission."""
Expand Down
Loading
Loading