Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import { StudyDownloadService } from './services/study-download-request.service'
import { TagModule } from '../tag/tag.module';
import { StudyDownloadRequestPipe } from './pipes/study-download-request.pipe';
import { DatasetDownloadRequestPipe } from './pipes/dataset-download-request.pipe';
import { BasicCsvTransformer } from './pipes/csv/basic-field.pipe';
import { VideoCsvTransformer } from './pipes/csv/video-field.pipe';

@Module({
imports: [
Expand All @@ -42,7 +44,9 @@ import { DatasetDownloadRequestPipe } from './pipes/dataset-download-request.pip
StudyDownloadRequestResolver,
StudyDownloadService,
StudyDownloadRequestPipe,
DatasetDownloadRequestPipe
DatasetDownloadRequestPipe,
BasicCsvTransformer,
VideoCsvTransformer
]
})
export class DownloadRequestModule {}
17 changes: 17 additions & 0 deletions packages/server/src/download-request/pipes/csv/basic-field.pipe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { PipeTransform, Injectable } from '@nestjs/common';
import { CsvFieldTest } from '../../types/csv-field';

/**
* Handles "transforming" CSV fields where the data itself is already
* able to be represented as a string
*/
@Injectable()
export class BasicCsvTransformer implements PipeTransform<any, Promise<string>> {
async transform(value: any): Promise<string> {
return value ? value.toString() : '';
}
}

export const basicCsvTest: CsvFieldTest = (_uischema, _schema) => {
return true;
};
34 changes: 34 additions & 0 deletions packages/server/src/download-request/pipes/csv/video-field.pipe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { PipeTransform, Injectable } from '@nestjs/common';
import { CsvFieldTest } from '../../types/csv-field';
import { EntryService } from '../../../entry/services/entry.service';

/**
* Handles converting an Entry ID into the filename. If the value is null/undefined, then
* an empty string is returned.
*/
@Injectable()
export class VideoCsvTransformer implements PipeTransform<any, Promise<string>> {
constructor(private readonly entryService: EntryService) {}

async transform(value: any): Promise<string> {
if (!value) {
return '';
}

// Otherwise try to get an entry
const entry = await this.entryService.find(value);
if (!entry) {
throw new Error(`Invalid entry id: ${value}`);
}

// Now grab just the file name from the entry's bucket location
return entry.bucketLocation.split('/').pop() || '';
}
}

export const videoCsvTest: CsvFieldTest = (uischema, _schema) => {
if (uischema.options && uischema.options.customType && uischema.options.customType === 'video') {
return true;
}
return false;
};
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@ import { TagService } from '../../tag/services/tag.service';
import { TagFieldType } from '../../tag/models/tag-field.model';
import { VideoFieldService } from '../../tag/services/video-field.service';
import { BucketObjectAction } from 'src/bucket/bucket';
import { Entry } from 'src/entry/models/entry.model';
import { Study } from 'src/study/study.model';
import { Entry } from '../../entry/models/entry.model';
import { Study } from '../../study/study.model';
import { randomUUID } from 'crypto';
import { CsvField } from '../types/csv-field';
import { videoCsvTest, VideoCsvTransformer } from '../pipes/csv/video-field.pipe';
import { basicCsvTest, BasicCsvTransformer } from '../pipes/csv/basic-field.pipe';
import { StudyService } from '../../study/study.service';

@Injectable()
export class StudyDownloadService {
Expand All @@ -37,7 +41,10 @@ export class StudyDownloadService {
private readonly bucketFactory: BucketFactory,
private readonly configService: ConfigService,
private readonly tagService: TagService,
private readonly videoFieldService: VideoFieldService
private readonly videoFieldService: VideoFieldService,
private readonly basicCsvTransformer: BasicCsvTransformer,
private readonly videoCsvTransformer: VideoCsvTransformer,
private readonly studyService: StudyService
) {}

async createDownloadRequest(
Expand Down Expand Up @@ -83,6 +90,7 @@ export class StudyDownloadService {
request = (await this.downloadRequestModel.findById(request._id))!;

// Download the entries that were generated as part of this study
/*
await this.downloadService.startZipJob({
entryJSONLocation: request.entryJSONLocation!,
entryZIPLocation: request.entryZIPLocation!,
Expand All @@ -100,9 +108,11 @@ export class StudyDownloadService {
bucket: (await this.bucketFactory.getBucket(request.organization))!,
organization: request.organization
});
*/
// Download the tag data as a CSV
await this.generateCSV(request);
// Download the entries that were tagged in this study
/*
await this.downloadService.startZipJob({
entryJSONLocation: request.taggedEntriesJSONLocation!,
entryZIPLocation: request.taggedEntriesZipLocation!,
Expand All @@ -119,7 +129,7 @@ export class StudyDownloadService {
entries: await this.getLabeledEntries(request),
bucket: (await this.bucketFactory.getBucket(request.organization))!,
organization: request.organization
});
}); */

return request;
}
Expand Down Expand Up @@ -181,33 +191,25 @@ export class StudyDownloadService {
*/
private async generateCSV(downloadRequest: StudyDownloadRequest): Promise<void> {
const tags = await this.tagService.getCompleteTags(downloadRequest.study);
const study = await this.studyService.findById(downloadRequest.study);
if (!study) {
throw new Error(`Study with id ${downloadRequest.study} not found`);
}

// Turn the tag fields into their "CSV-friendly" format
const converted: any[] = [];
// Convert the data into a CSV
const csvFields = await this.getFieldTransformers(study);
const headers = csvFields.map((csvField) => csvField.header).join(',');

let body = '';
for (const tag of tags) {
const tagFields: any = {};

// Add basic meta-fields
tagFields['prompt'] = (await this.entryService.find(tag.entry))!.bucketLocation.split('/').pop();

for (const field of tag.data!) {
// For video fields, each entry is represented by the filename
if (field.type == TagFieldType.VIDEO_RECORD) {
const videoField = (await this.videoFieldService.find(field.data))!;
for (let index = 0; index < videoField.entries.length; index++) {
const entryID = videoField.entries[index];
const entry = (await this.entryService.find(entryID))!;
tagFields[`${field.name}-${index}`] = entry.bucketLocation.split('/').pop();
}
} else {
tagFields[`${field.name}`] = field.data;
}
const row: string[] = [];
for (const csvField of csvFields) {
row.push(await csvField.convertField(tag));
}
converted.push(tagFields);
body = body + row.join(',') + '\n';
}

// Convert the data into a CSV
const dataString = this.convertToCSV(converted);
const dataString = headers + '\n' + body;

// Store the CSV in the expected location in the bucket
const bucket = await this.bucketFactory.getBucket(downloadRequest.organization);
Expand Down Expand Up @@ -237,19 +239,6 @@ export class StudyDownloadService {
return bucket.getSignedUrl(location, BucketObjectAction.READ, new Date(Date.now() + this.expiration));
}

/**
* TODO: Improve the CSV process, need a better method to determine the headers and handle default values
*/
private convertToCSV(arr: any[]): string {
const array = [Object.keys(arr[0])].concat(arr);

return array
.map((it) => {
return Object.values(it).toString();
})
.join('\n');
}

/**
* Get the entries taged as part of the study
*/
Expand All @@ -272,4 +261,84 @@ export class StudyDownloadService {
})
);
}

/** Get the list of CSV tranformers that can convert the tag data */
private async getFieldTransformers(study: Study): Promise<CsvField[]> {
const csvFields: CsvField[] = [];

// Add the meta data converts
csvFields.push({
header: 'prompt',
convertField: async (tag) => {
const entry = await this.entryService.find(tag.entry);
if (!entry) {
throw new Error(`Entry with id ${tag.entry} not found`);
}
return entry.bucketLocation.split('/').pop() || '';
}
});

// Go through all the properties in the data schema
const propertyNames = Object.getOwnPropertyNames(study.tagSchema.dataSchema.properties);

for (const propertyName of propertyNames) {
// Get the data schema and the ui schema
const dataSchema = study.tagSchema.dataSchema.properties![propertyName];
const uiSchema = study.tagSchema.uiSchema.elements.find(
(element: any) => element.scope === `#/properties/${propertyName}`
);

if (!dataSchema || !uiSchema) {
throw new Error(`Could not find schema for property ${propertyName}`);
}

// Now determine the proper way to represent the given field
if (videoCsvTest(uiSchema, dataSchema)) {
const minVideos = uiSchema.options!.minimumRequired!;

let maxVideos = uiSchema.options!.maximumOptional;
if (!maxVideos) {
maxVideos = minVideos;
}

for (let i = 0; i < maxVideos; i++) {
csvFields.push({
header: `${propertyName}-video-${i + 1}`,
convertField: async (tag) => {
// Get the corresponding tag field
const tagField = tag.data?.find((field) => field.name == propertyName);
if (!tagField) {
throw new Error(`Tag field ${propertyName} not found`);
}

// Get the video field
const videoField = await this.videoFieldService.find(tagField.data);
if (!videoField) {
throw new Error(`Could not find video field ${tagField.data}`);
}

// Transform the video field at the given index into a CSV friendly format
return this.videoCsvTransformer.transform(videoField.entries[i]);
}
});
}
} else if (basicCsvTest(uiSchema, dataSchema)) {
csvFields.push({
header: propertyName,
convertField: async (tag) => {
const tagField = tag.data?.find((field) => field.name == propertyName);
if (!tagField) {
throw new Error(`Tag field ${propertyName} not found`);
}

return await this.basicCsvTransformer.transform(tagField.data);
}
});
} else {
throw new Error(`Cannot convert property ${propertyName} into a CSV format`);
}
}

return csvFields;
}
}
13 changes: 13 additions & 0 deletions packages/server/src/download-request/types/csv-field.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { JsonSchema, UISchemaElement } from '@jsonforms/core';
import { Tag } from '../../tag/models/tag.model';

/**
* Represents a column within a CSV. Keep track both of the header as well as how
* to get the field value from an object.
*/
export interface CsvField {
header: string;
convertField: (value: Tag) => Promise<string>;
}

export type CsvFieldTest = (uischema: UISchemaElement, schema: JsonSchema) => boolean;