Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions airflow/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def update_import_errors(session, dagbag):
session.commit()

@provide_session
def create_dag_run(self, dag, session=None):
def create_dag_run(self, dag, session=None, dry_run=False):
"""
This method checks whether a new DagRun needs to be created
for a DAG based on scheduling interval.
Expand Down Expand Up @@ -867,7 +867,6 @@ def create_dag_run(self, dag, session=None):
"Dag start date: %s. Next run date: %s",
dag.start_date, next_run_date
)

# don't ever schedule in the future
if next_run_date > timezone.utcnow():
return
Expand All @@ -892,6 +891,11 @@ def create_dag_run(self, dag, session=None):
if next_run_date and min_task_end_date and next_run_date > min_task_end_date:
return

# Don't really schedule the job, we are interested in its next run date
# as calculated by the scheduler
if dry_run is True:
return next_run_date
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ultrabug This was my complaint - I don't think we should be calling this method to work out when the next exec date is -- cos this can return None when the dag concurrecy is at it's limit for instance (line 789) and that is the problem. I think it's going to cause confusion that the "scheduled in" column would come-and-go depending on what other runs might be happening.

Copy link
Contributor Author

@ultrabug ultrabug Dec 11, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok @ashb I guess we can work on something to make it better indeed. Your point is not depending on the concurrency limit if I'm correct and display the theorically due schedule in time. Am I right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping @ashb and @ultrabug . Can I help you guys to get this over the line?


if next_run_date and period_end and period_end <= timezone.utcnow():
next_run = dag.create_dagrun(
run_id=DagRun.ID_PREFIX + next_run_date.isoformat(),
Expand Down
26 changes: 26 additions & 0 deletions airflow/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3732,6 +3732,32 @@ def latest_execution_date(self, session=None):
).scalar()
return execution_date

@property
@provide_session
def schedulable_at(self, session=None):
"""
Returns the earliest time at which the DAG will next be eligible for
execution by the scheduler
"""
from airflow.jobs import SchedulerJob
scheduler = SchedulerJob()
next_run_date = scheduler.create_dag_run(self, dry_run=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am still getting confused here. You might have already talked about this to @ashb but it would be good if you can clear my doubt.

My confusion is the same that Ash mentioned in one of the comments, what is the difference between schedulable_at and next_execution_date?

From the code will it not give the schedulable_at=next_execution_date + schedule_interval ?

if next_run_date:
period_end = self.following_schedule(next_run_date)
return period_end

@property
def scheduled_in(self):
"""
Returns a human readable duration before the next schedulable time
of the DAG
"""
import pendulum
diff = self.schedulable_at - pendulum.now()
if diff.in_seconds() <= 0:
return "overdue"
return diff

@property
def subdags(self):
"""
Expand Down
5 changes: 5 additions & 0 deletions airflow/www/templates/airflow/dag.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ <h3 class="pull-left">
<span style='color:#AAA;'>ROOT: </span> <span> {{ root }}</span>
{% endif %}
</h3>
<h4 class="pull-right">
<a class="label label-primary" href="/admin/dagrun/?flt2_dag_id_equals={{ dag.dag_id }}">
scheduled in: {{ dag.scheduled_in }}
</a>
</h4>
<h4 class="pull-right">
<a class="label label-default" href="/admin/dagrun/?flt2_dag_id_equals={{ dag.dag_id }}">
schedule: {{ dag.schedule_interval }}
Expand Down
14 changes: 13 additions & 1 deletion airflow/www/templates/airflow/dags.html
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ <h2>DAGs</h2>
<span id="statuses_info" class="glyphicon glyphicon-info-sign" aria-hidden="true" title="Status of all previous DAG runs."></span>
<img id="loading" width="15" src="{{ url_for("static", filename="loading.gif") }}">
</th>
<th>Scheduled in
<span id="statuses_info" class="glyphicon glyphicon-info-sign" aria-hidden="true" title="Earliest time at which the DAG will next be scheduled."></span>
</th>
<th class="text-center">Links</th>
</tr>
</thead>
Expand Down Expand Up @@ -134,7 +137,16 @@ <h2>DAGs</h2>
<svg height="10" width="10" id='dag-run-{{ dag.safe_dag_id }}' style="display: block;"></svg>
</td>

<!-- Column 9: Links -->
<!-- Column 9: Dag Scheduled in -->
<td>
{% if dag_id in webserver_dags %}
<a class="label label-default" href="/admin/dagrun/?flt2_dag_id_equals={{ dag.dag_id }}" title="Schedulable at: {{ dag.schedulable_at }}">
{{ dag.scheduled_in }}
</a>
{% endif %}
</td>

<!-- Column 10: Links -->
<td class="text-center" style="display:flex; flex-direction:row; justify-content:space-around;">
{% if dag %}

Expand Down
25 changes: 25 additions & 0 deletions tests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,31 @@ def jinja_udf(name):
result = task.render_template('', "{{ 'world' | hello}}", dict())
self.assertEqual(result, 'Hello world')

def test_schedulable_at(self):
"""
[AIRFLOW-1424] test the 'schedulable_after' DAG property.
"""
test_dag_id = 'test_get_num_task_instances_dag'
test_task_id = 'task_1'

test_dag = DAG(dag_id=test_dag_id, start_date=DEFAULT_DATE,
schedule_interval=datetime.timedelta(days=1))
DummyOperator(task_id=test_task_id, dag=test_dag)
self.assertEqual(test_dag.schedulable_at,
DEFAULT_DATE + datetime.timedelta(days=1))

def test_scheduled_in_overdue(self):
"""
[AIRFLOW-1424] test the 'scheduled_in' DAG property.
"""
test_dag_id = 'test_get_num_task_instances_dag'
test_task_id = 'task_1'

test_dag = DAG(dag_id=test_dag_id, start_date=DEFAULT_DATE,
schedule_interval=datetime.timedelta(days=1))
DummyOperator(task_id=test_task_id, dag=test_dag)
self.assertEqual(test_dag.scheduled_in, "overdue")

def test_cycle(self):
# test empty
dag = DAG(
Expand Down