Skip to content
2 changes: 0 additions & 2 deletions bin/pg_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,3 @@
call([
'docker', 'exec', 'codabench-django-1', 'python', 'manage.py', 'upload_backup', f'{dump_name}'
])


9 changes: 5 additions & 4 deletions compute_worker/compute_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
# Setup base directories used by all submissions
# note: we need to pass this directory to docker-compose so it knows where to store things!
HOST_DIRECTORY = os.environ.get("HOST_DIRECTORY", "/tmp/codabench/")
BASE_DIR = "/codabench/" # base directory inside the container
BASE_DIR = "/codabench/" # base directory inside the container
CACHE_DIR = os.path.join(BASE_DIR, "cache")
MAX_CACHE_DIR_SIZE_GB = float(os.environ.get('MAX_CACHE_DIR_SIZE_GB', 10))

Expand Down Expand Up @@ -74,6 +74,7 @@
else:
CONTAINER_ENGINE_EXECUTABLE = "docker"


class SubmissionException(Exception):
pass

Expand Down Expand Up @@ -182,7 +183,7 @@ def __init__(self, run_args):
self.bundle_dir = os.path.join(self.root_dir, "bundles")
self.input_dir = os.path.join(self.root_dir, "input")
self.output_dir = os.path.join(self.root_dir, "output")
self.data_dir = os.path.join(HOST_DIRECTORY, "data") # absolute path to data in the host
self.data_dir = os.path.join(HOST_DIRECTORY, "data") # absolute path to data in the host
self.logs = {}

# Details for submission
Expand Down Expand Up @@ -497,10 +498,10 @@ async def _run_program_directory(self, program_dir, kind, can_be_output=False):

logger.info(f"Metadata path is {os.path.join(program_dir, metadata_path)}")
with open(os.path.join(program_dir, metadata_path), 'r') as metadata_file:
try: # try to find a command in the metadata, in other cases set metadata to None
try: # try to find a command in the metadata, in other cases set metadata to None
metadata = yaml.load(metadata_file.read(), Loader=yaml.FullLoader)
logger.info(f"Metadata contains:\n {metadata}")
if isinstance(metadata, dict): # command found
if isinstance(metadata, dict): # command found
command = metadata.get("command")
else:
command = None
Expand Down
2 changes: 2 additions & 0 deletions fabfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
# $ fab -R role_name <command>
env.roledefs = yaml.load(open('server_config.yaml').read())


# ----------------------------------------------------------------------------
# Helpers
# ----------------------------------------------------------------------------
def _reconnect_current_host():
network.disconnect_all()
connections.connect(env.host + ':%s' % env.port)


# ----------------------------------------------------------------------------
# Tasks
# ----------------------------------------------------------------------------
Expand Down
13 changes: 11 additions & 2 deletions src/apps/api/serializers/competitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from api.fields import NamedBase64ImageField
from api.mixins import DefaultUserCreateMixin
from api.serializers.datasets import DataDetailSerializer
from api.serializers.leaderboards import LeaderboardSerializer, ColumnSerializer
from api.serializers.profiles import CollaboratorSerializer
from api.serializers.submissions import SubmissionScoreSerializer
Expand Down Expand Up @@ -41,6 +42,8 @@ class Meta:
'auto_migrate_to_this_phase',
'hide_output',
'leaderboard',
'public_data',
'starting_kit',
'is_final_phase',
)

Expand Down Expand Up @@ -90,6 +93,9 @@ class PhaseDetailSerializer(serializers.ModelSerializer):
tasks = PhaseTaskInstanceSerializer(source='task_instances', many=True)
status = serializers.SerializerMethodField()

public_data = DataDetailSerializer(read_only=True)
starting_kit = DataDetailSerializer(read_only=True)

class Meta:
model = Phase
fields = (
Expand All @@ -100,13 +106,16 @@ class Meta:
'name',
'description',
'status',
'execution_time_limit',
'tasks',
'auto_migrate_to_this_phase',
'has_max_submissions',
'max_submissions_per_day',
'max_submissions_per_person',
'execution_time_limit',
'auto_migrate_to_this_phase',
'hide_output',
# no leaderboard
'public_data',
'starting_kit',
'is_final_phase',
)

Expand Down
5 changes: 5 additions & 0 deletions src/apps/api/serializers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class Meta:
'was_created_by_competition',
'competition',
'file_name',

)
read_only_fields = (
'key',
Expand Down Expand Up @@ -61,6 +62,7 @@ def create(self, validated_data):


class DataSimpleSerializer(serializers.ModelSerializer):

class Meta:
model = Data
fields = (
Expand All @@ -74,6 +76,7 @@ class Meta:
class DataDetailSerializer(serializers.ModelSerializer):
created_by = serializers.CharField(source='created_by.username')
competition = serializers.SerializerMethodField()
value = serializers.CharField(source='key', required=False)

class Meta:
model = Data
Expand All @@ -86,6 +89,8 @@ class Meta:
'description',
'is_public',
'key',
# Value is used for Semantic Multiselect dropdown api calls
'value',
'was_created_by_competition',
'in_use',
'file_size',
Expand Down
4 changes: 2 additions & 2 deletions src/apps/api/serializers/leaderboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class LeaderboardPhaseSerializer(serializers.ModelSerializer):
tasks = PhaseTaskInstanceSerializer(source='task_instances', many=True)

def get_columns(self, instance):
columns = Column.objects.filter(leaderboard=instance.leaderboard)
columns = Column.objects.filter(leaderboard=instance.leaderboard, hidden=False)
if len(columns) == 0:
raise serializers.ValidationError("No columns exist on the leaderboard")
else:
Expand Down Expand Up @@ -156,7 +156,7 @@ def get_submissions(self, instance):
.select_related('owner').prefetch_related('scores') \
.annotate(primary_col=Sum('scores__score', filter=Q(scores__column=primary_col)))

for column in instance.leaderboard.columns.exclude(id=primary_col.id).order_by('index'):
for column in instance.leaderboard.columns.exclude(id=primary_col.id, hidden=False).order_by('index'):
col_name = f'col{column.index}'
ordering.append(f'{"-" if column.sorting == "desc" else ""}{col_name}')
kwargs = {
Expand Down
3 changes: 2 additions & 1 deletion src/apps/api/serializers/submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ class Meta:
'scores',
'display_name',
'slug_url',
'organization'
'organization',
'detailed_result'
)
extra_kwargs = {
"scores": {"read_only": True},
Expand Down
31 changes: 31 additions & 0 deletions src/apps/api/serializers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
class SolutionSerializer(WritableNestedModelSerializer):
tasks = serializers.SlugRelatedField(queryset=Task.objects.all(), required=False, allow_null=True, slug_field='key', many=True)
data = serializers.SlugRelatedField(queryset=Data.objects.all(), required=False, allow_null=True, slug_field='key')
size = serializers.SerializerMethodField()

class Meta:
model = Solution
Expand All @@ -23,8 +24,16 @@ class Meta:
'tasks',
'data',
'md5',
'size',
]

def get_size(self, instance):
try:
return instance.data.file_size
except AttributeError:
print("This solution has no data associated with it...might be a test")
return None


class SolutionListSerializer(serializers.ModelSerializer):
data = DataDetailSerializer()
Expand All @@ -38,6 +47,7 @@ class Meta:


class TaskSerializer(DefaultUserCreateMixin, WritableNestedModelSerializer):

input_data = serializers.SlugRelatedField(queryset=Data.objects.all(), required=False, allow_null=True, slug_field='key')
ingestion_program = serializers.SlugRelatedField(queryset=Data.objects.all(), required=False, allow_null=True, slug_field='key')
reference_data = serializers.SlugRelatedField(queryset=Data.objects.all(), required=False, allow_null=True, slug_field='key')
Expand Down Expand Up @@ -159,6 +169,8 @@ class PhaseTaskInstanceSerializer(serializers.HyperlinkedModelSerializer):
key = serializers.CharField(source='task.key', required=False)
created_when = serializers.DateTimeField(source='task.created_when', required=False)
name = serializers.CharField(source='task.name', required=False)
solutions = serializers.SerializerMethodField()
public_datasets = serializers.SerializerMethodField()

class Meta:
model = PhaseTaskInstance
Expand All @@ -172,4 +184,23 @@ class Meta:
'key',
'created_when',
'name',
'solutions',
'public_datasets'
)

def get_solutions(self, instance):
qs = instance.task.solutions.all()
return SolutionSerializer(qs, many=True).data

def get_public_datasets(self, instance):
input_data = instance.task.input_data
reference_data = instance.task.reference_data
ingestion_program = instance.task.ingestion_program
scoring_program = instance.task.scoring_program
try:
dataset_list_ids = [input_data.id, reference_data.id, ingestion_program.id, scoring_program.id]
qs = Data.objects.filter(id__in=dataset_list_ids)
return DataDetailSerializer(qs, many=True).data
except AttributeError:
print("This phase task has no datasets")
return None
18 changes: 9 additions & 9 deletions src/apps/api/views/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,22 @@ def get(self, request):
time_unit = request.query_params.get('time_unit')
csv = request.query_params.get('format') == 'csv'

start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').replace(tzinfo=pytz.UTC)
end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').replace(hour=11, minute=59, second=59, tzinfo=pytz.UTC)
_start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').replace(tzinfo=pytz.UTC)
_end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').replace(hour=11, minute=59, second=59, tzinfo=pytz.UTC)

users = build_request_object(User, 'date_joined', time_unit, start_date, end_date, csv, 'users_data_date', 'users_data_count')
competitions = build_request_object(Competition, 'created_when', time_unit, start_date, end_date, csv, 'competitions_data_date', 'competitions_data_count')
submissions = build_request_object(Submission, 'created_when', time_unit, start_date, end_date, csv, 'submissions_data_date', 'submissions_data_count')
users = build_request_object(User, 'date_joined', time_unit, _start_date, _end_date, csv, 'users_data_date', 'users_data_count')
competitions = build_request_object(Competition, 'created_when', time_unit, _start_date, _end_date, csv, 'competitions_data_date', 'competitions_data_count')
submissions = build_request_object(Submission, 'created_when', time_unit, _start_date, _end_date, csv, 'submissions_data_date', 'submissions_data_count')

if csv:
ob = [{
'start_date': start_date,
'end_date': end_date,
'time_unit': time_unit,
'registered_user_count': User.objects.filter(date_joined__range=[start_date, end_date]).count(),
'competition_count': Competition.objects.filter(created_when__range=[start_date, end_date]).count(),
'competitions_published_count': Competition.objects.filter(published=True, created_when__range=[start_date, end_date]).count(),
'submissions_made_count': Submission.objects.filter(created_when__range=[start_date, end_date]).count(),
'registered_user_count': User.objects.filter(date_joined__range=[_start_date, _end_date]).count(),
'competition_count': Competition.objects.filter(created_when__range=[_start_date, _end_date]).count(),
'competitions_published_count': Competition.objects.filter(published=True, created_when__range=[_start_date, _end_date]).count(),
'submissions_made_count': Submission.objects.filter(created_when__range=[_start_date, _end_date]).count(),
}]

max_len = max(len(users), len(competitions), len(submissions))
Expand Down
33 changes: 33 additions & 0 deletions src/apps/api/views/competitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from competitions.emails import send_participation_requested_emails, send_participation_accepted_emails, \
send_participation_denied_emails, send_direct_participant_email
from competitions.models import Competition, Phase, CompetitionCreationTaskStatus, CompetitionParticipant, Submission
from datasets.models import Data
from competitions.tasks import batch_send_email, manual_migration, create_competition_dump
from competitions.utils import get_popular_competitions, get_featured_competitions
from leaderboards.models import Leaderboard
Expand Down Expand Up @@ -228,7 +229,21 @@ def update(self, request, *args, **kwargs):

phase['leaderboard'] = leaderboard_id

# Get public_data and starting_kit
for phase in data['phases']:
# We just need to know what public_data and starting_kit go with this phase
# We don't need to serialize the whole object
try:
phase['public_data'] = Data.objects.filter(key=phase['public_data']['value'])[0].id
except TypeError:
phase['public_data'] = None
try:
phase['starting_kit'] = Data.objects.filter(key=phase['starting_kit']['value'])[0].id
except TypeError:
phase['starting_kit'] = None

serializer = self.get_serializer(instance, data=data, partial=partial)
type(serializer)
serializer.is_valid(raise_exception=True)
self.perform_update(serializer)

Expand Down Expand Up @@ -541,6 +556,7 @@ def get_leaderboard(self, request, pk):
}
columns = [col for col in query['columns']]
submissions_keys = {}
submission_detailed_results = {}
for submission in query['submissions']:
# count number of entries/number of submissions for the owner of this submission for this phase
# count all submissions with no parent and count all parents without counting the children
Expand All @@ -558,12 +574,24 @@ def get_leaderboard(self, request, pk):
.strftime('%Y-%m-%d')

submission_key = f"{submission['owner']}{submission['parent'] or submission['id']}"

# gather detailed result from submissions for each task
# detailed_results are gathered based on submission key
# `id` is used to fetch the right detailed result in detailed results page
# `detailed_result` url is not needed
submission_detailed_results.setdefault(submission_key, []).append({
# 'detailed_result': submission['detailed_result'],
'task': submission['task'],
'id': submission['id']
})

if submission_key not in submissions_keys:
submissions_keys[submission_key] = len(response['submissions'])
response['submissions'].append({
'id': submission['id'],
'owner': submission['display_name'] or submission['owner'],
'scores': [],
'detailed_results': [],
'fact_sheet_answers': submission['fact_sheet_answers'],
'slug_url': submission['slug_url'],
'organization': submission['organization'],
Expand All @@ -588,6 +616,11 @@ def get_leaderboard(self, request, pk):
tempScore['score'] = str(round(float(tempScore["score"]), precision))
response['submissions'][submissions_keys[submission_key]]['scores'].append(tempScore)

# put detailed results in its submission
for k, v in submissions_keys.items():
response['submissions'][v]['detailed_results'] = submission_detailed_results[k]
print(f"\n{response['submissions']}\n")

for task in query['tasks']:
# This can be used to rendered variable columns on each task
tempTask = {
Expand Down
1 change: 0 additions & 1 deletion src/apps/api/views/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def get_serializer_class(self):
return serializers.DataSerializer

def create(self, request, *args, **kwargs):

serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
new_dataset = serializer.save() # request_sassy_file_name is temporarily set via this serializer
Expand Down
25 changes: 25 additions & 0 deletions src/apps/competitions/migrations/0033_auto_20230617_1753.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 2.2.17 on 2023-06-17 17:53

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('datasets', '0007_auto_20230609_1738'),
('competitions', '0032_submission_worker_hostname'),
]

operations = [
migrations.AddField(
model_name='phase',
name='public_data',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='phase_public_data', to='datasets.Data'),
),
migrations.AddField(
model_name='phase',
name='starting_kit',
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='phase_starting_kit', to='datasets.Data'),
),
]
3 changes: 3 additions & 0 deletions src/apps/competitions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,9 @@ class Phase(ChaHubSaveMixin, models.Model):
leaderboard = models.ForeignKey('leaderboards.Leaderboard', on_delete=models.DO_NOTHING, null=True, blank=True,
related_name="phases")

public_data = models.ForeignKey('datasets.Data', on_delete=models.SET_NULL, null=True, blank=True, related_name="phase_public_data")
starting_kit = models.ForeignKey('datasets.Data', on_delete=models.SET_NULL, null=True, blank=True, related_name="phase_starting_kit")

class Meta:
ordering = ('index',)

Expand Down
1 change: 0 additions & 1 deletion src/apps/competitions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,6 @@ def mark_status_as_failed_and_delete_dataset(competition_creation_status, detail
)

unpacker.unpack()

try:
competition = unpacker.save()
except ValidationError as e:
Expand Down
Loading