From 79005f06846f89c5a18172a2c3a8a02888fdf6ce Mon Sep 17 00:00:00 2001 From: cbolles Date: Mon, 13 May 2024 11:10:29 -0400 Subject: [PATCH 1/4] Add in data types to handle CSV convertion --- .../download-request.module.ts | 6 ++++- .../pipes/csv/basic-field.pipe.ts | 12 +++++++++ .../pipes/csv/video-field.pipe.ts | 26 +++++++++++++++++++ .../src/download-request/types/csv-field.ts | 10 +++++++ 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 packages/server/src/download-request/pipes/csv/basic-field.pipe.ts create mode 100644 packages/server/src/download-request/pipes/csv/video-field.pipe.ts create mode 100644 packages/server/src/download-request/types/csv-field.ts diff --git a/packages/server/src/download-request/download-request.module.ts b/packages/server/src/download-request/download-request.module.ts index 700e4b0..b0f9a83 100644 --- a/packages/server/src/download-request/download-request.module.ts +++ b/packages/server/src/download-request/download-request.module.ts @@ -18,6 +18,8 @@ import { StudyDownloadService } from './services/study-download-request.service' import { TagModule } from '../tag/tag.module'; import { StudyDownloadRequestPipe } from './pipes/study-download-request.pipe'; import { DatasetDownloadRequestPipe } from './pipes/dataset-download-request.pipe'; +import { BasicCsvTransformer } from './pipes/csv/basic-field.pipe'; +import { VideoCsvTransformer } from './pipes/csv/video-field.pipe'; @Module({ imports: [ @@ -42,7 +44,9 @@ import { DatasetDownloadRequestPipe } from './pipes/dataset-download-request.pip StudyDownloadRequestResolver, StudyDownloadService, StudyDownloadRequestPipe, - DatasetDownloadRequestPipe + DatasetDownloadRequestPipe, + BasicCsvTransformer, + VideoCsvTransformer ] }) export class DownloadRequestModule {} diff --git a/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts new file mode 100644 index 0000000..3e58f11 --- /dev/null +++ b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts @@ -0,0 +1,12 @@ +import { PipeTransform, Injectable } from '@nestjs/common'; + +/** + * Handles "transforming" CSV fields where the data itself is already + * able to be represented as a string + */ +@Injectable() +export class BasicCsvTransformer implements PipeTransform> { + async transform(value: any): Promise { + return value.toString(); + } +} diff --git a/packages/server/src/download-request/pipes/csv/video-field.pipe.ts b/packages/server/src/download-request/pipes/csv/video-field.pipe.ts new file mode 100644 index 0000000..f973106 --- /dev/null +++ b/packages/server/src/download-request/pipes/csv/video-field.pipe.ts @@ -0,0 +1,26 @@ +import { PipeTransform, Injectable } from '@nestjs/common'; +import { EntryService } from '../../../entry/services/entry.service'; + +/** + * Handles converting an Entry ID into the filename. If the value is null/undefined, then + * an empty string is returned. + */ +@Injectable() +export class VideoCsvTransformer implements PipeTransform> { + constructor(private readonly entryService: EntryService) {} + + async transform(value: any): Promise { + if (!value) { + return ''; + } + + // Otherwise try to get an entry + const entry = await this.entryService.find(value); + if (!entry) { + throw new Error(`Invalid entry id: ${value}`); + } + + // Now grab just the file name from the entry's bucket location + return entry.bucketLocation.split('/').pop() || ''; + } +} diff --git a/packages/server/src/download-request/types/csv-field.ts b/packages/server/src/download-request/types/csv-field.ts new file mode 100644 index 0000000..c723b79 --- /dev/null +++ b/packages/server/src/download-request/types/csv-field.ts @@ -0,0 +1,10 @@ +import { PipeTransform } from '@nestjs/common'; + +/** + * Represents a column within a CSV. Keep track both of the header as well as how + * to get the field value from an object. + */ +export interface CsvField { + header: string; + convertField: PipeTransform>; +} From 636b29e53437164de23f072afb2d73fc69597a14 Mon Sep 17 00:00:00 2001 From: cbolles Date: Mon, 13 May 2024 13:02:48 -0400 Subject: [PATCH 2/4] Add more involved method of converting to CSV --- .../pipes/csv/basic-field.pipe.ts | 5 + .../pipes/csv/video-field.pipe.ts | 8 ++ .../study-download-request.service.ts | 93 ++++++++++++++++++- .../src/download-request/types/csv-field.ts | 7 +- 4 files changed, 107 insertions(+), 6 deletions(-) diff --git a/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts index 3e58f11..2196656 100644 --- a/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts +++ b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts @@ -1,4 +1,5 @@ import { PipeTransform, Injectable } from '@nestjs/common'; +import { CsvFieldTest } from '../../types/csv-field'; /** * Handles "transforming" CSV fields where the data itself is already @@ -10,3 +11,7 @@ export class BasicCsvTransformer implements PipeTransform> return value.toString(); } } + +export const basicCsvTest: CsvFieldTest = (_uischema, _schema) => { + return true; +}; diff --git a/packages/server/src/download-request/pipes/csv/video-field.pipe.ts b/packages/server/src/download-request/pipes/csv/video-field.pipe.ts index f973106..042c737 100644 --- a/packages/server/src/download-request/pipes/csv/video-field.pipe.ts +++ b/packages/server/src/download-request/pipes/csv/video-field.pipe.ts @@ -1,4 +1,5 @@ import { PipeTransform, Injectable } from '@nestjs/common'; +import { CsvFieldTest } from '../../types/csv-field'; import { EntryService } from '../../../entry/services/entry.service'; /** @@ -24,3 +25,10 @@ export class VideoCsvTransformer implements PipeTransform> return entry.bucketLocation.split('/').pop() || ''; } } + +export const videoCsvTest: CsvFieldTest = (uischema, _schema) => { + if (uischema.options && uischema.options.customType && uischema.options.customType === 'video') { + return true; + } + return false; +}; diff --git a/packages/server/src/download-request/services/study-download-request.service.ts b/packages/server/src/download-request/services/study-download-request.service.ts index 55efa7b..15d9f41 100644 --- a/packages/server/src/download-request/services/study-download-request.service.ts +++ b/packages/server/src/download-request/services/study-download-request.service.ts @@ -13,9 +13,13 @@ import { TagService } from '../../tag/services/tag.service'; import { TagFieldType } from '../../tag/models/tag-field.model'; import { VideoFieldService } from '../../tag/services/video-field.service'; import { BucketObjectAction } from 'src/bucket/bucket'; -import { Entry } from 'src/entry/models/entry.model'; -import { Study } from 'src/study/study.model'; +import { Entry } from '../../entry/models/entry.model'; +import { Study } from '../../study/study.model'; import { randomUUID } from 'crypto'; +import { CsvField } from '../types/csv-field'; +import { videoCsvTest, VideoCsvTransformer } from '../pipes/csv/video-field.pipe'; +import { basicCsvTest, BasicCsvTransformer } from '../pipes/csv/basic-field.pipe'; +import { StudyService } from '../../study/study.service'; @Injectable() export class StudyDownloadService { @@ -37,7 +41,10 @@ export class StudyDownloadService { private readonly bucketFactory: BucketFactory, private readonly configService: ConfigService, private readonly tagService: TagService, - private readonly videoFieldService: VideoFieldService + private readonly videoFieldService: VideoFieldService, + private readonly basicCsvTransformer: BasicCsvTransformer, + private readonly videoCsvTransformer: VideoCsvTransformer, + private readonly studyService: StudyService ) {} async createDownloadRequest( @@ -181,6 +188,10 @@ export class StudyDownloadService { */ private async generateCSV(downloadRequest: StudyDownloadRequest): Promise { const tags = await this.tagService.getCompleteTags(downloadRequest.study); + const study = await this.studyService.findById(downloadRequest.study); + if (!study) { + throw new Error(`Study with id ${downloadRequest.study} not found`); + } // Turn the tag fields into their "CSV-friendly" format const converted: any[] = []; @@ -207,7 +218,14 @@ export class StudyDownloadService { } // Convert the data into a CSV - const dataString = this.convertToCSV(converted); + const csvFields = await this.getFieldTransformers(study); + const headers = csvFields.map((csvField) => csvField.header).join(','); + + const body = tags.map((tag) => { + return csvFields.map((csvField) => csvField.convertField(tag)).join(','); + }).join('\n'); + + const dataString = headers + '\n' + body; // Store the CSV in the expected location in the bucket const bucket = await this.bucketFactory.getBucket(downloadRequest.organization); @@ -272,4 +290,71 @@ export class StudyDownloadService { }) ); } + + /** Get the list of CSV tranformers that can convert the tag data */ + private async getFieldTransformers(study: Study): Promise { + // Go through all the properties in the data schema + const propertyNames = Object.getOwnPropertyNames(study.tagSchema.dataSchema); + + const csvFields: CsvField[] = []; + + for(const propertyName of propertyNames) { + // Get the data schema and the ui schema + const dataSchema = study.tagSchema.dataSchema.properties![propertyName]; + const uiSchema = study.tagSchema.uiSchema.elements.find( + (element: any) => element.scope === `#/properties/${propertyName}` + ); + if (!dataSchema || !uiSchema) { + throw new Error(`Could not find schema for property ${propertyName}`); + } + + // Now determine the proper way to represent the given field + if (videoCsvTest(uiSchema, dataSchema)) { + const minVideos = uiSchema.options!.minimumRequired!; + + let maxVideos = uiSchema.options!.maximumRequired; + if (!maxVideos) { + maxVideos = minVideos; + } + + for (let i = 0; i < maxVideos; i++) { + csvFields.push({ + header: `${propertyName}-video-${i + 1}`, + convertField: async (tag) => { + // Get the corresponding tag field + const tagField = tag.data?.find((field) => field.name == propertyName); + if (!tagField) { + throw new Error(`Tag field ${propertyName} not found`); + } + + // Get the video field + const videoField = await this.videoFieldService.find(tagField.data); + if (!videoField) { + throw new Error(`Could not find video field ${tagField.data}`); + } + + // Transform the video field at the given index into a CSV friendly format + return this.videoCsvTransformer.transform(videoField.entries[i]); + } + }); + } + + } else if (basicCsvTest(uiSchema, dataSchema)) { + csvFields.push({ + header: propertyName, + convertField: (tag) => { + const tagField = tag.data?.find((field) => field.name == propertyName); + if (!tagField) { + throw new Error(`Tag field ${propertyName} not found`); + } + return this.basicCsvTransformer.transform(tagField.data); + } + }); + } else { + throw new Error(`Cannot convert property ${propertyName} into a CSV format`); + } + } + + return csvFields; + } } diff --git a/packages/server/src/download-request/types/csv-field.ts b/packages/server/src/download-request/types/csv-field.ts index c723b79..27e1d2a 100644 --- a/packages/server/src/download-request/types/csv-field.ts +++ b/packages/server/src/download-request/types/csv-field.ts @@ -1,4 +1,5 @@ -import { PipeTransform } from '@nestjs/common'; +import { JsonSchema, UISchemaElement } from '@jsonforms/core'; +import { Tag } from '../../tag/models/tag.model'; /** * Represents a column within a CSV. Keep track both of the header as well as how @@ -6,5 +7,7 @@ import { PipeTransform } from '@nestjs/common'; */ export interface CsvField { header: string; - convertField: PipeTransform>; + convertField: (value: Tag) => Promise; } + +export type CsvFieldTest = (uischema: UISchemaElement, schema: JsonSchema) => boolean; From 08ced1728a636a1a9911c1810f73af1e9ad605f2 Mon Sep 17 00:00:00 2001 From: cbolles Date: Mon, 13 May 2024 13:45:28 -0400 Subject: [PATCH 3/4] Working CSV download --- .../pipes/csv/basic-field.pipe.ts | 2 +- .../study-download-request.service.ts | 78 ++++++++----------- 2 files changed, 33 insertions(+), 47 deletions(-) diff --git a/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts index 2196656..29ef6e7 100644 --- a/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts +++ b/packages/server/src/download-request/pipes/csv/basic-field.pipe.ts @@ -8,7 +8,7 @@ import { CsvFieldTest } from '../../types/csv-field'; @Injectable() export class BasicCsvTransformer implements PipeTransform> { async transform(value: any): Promise { - return value.toString(); + return value ? value.toString() : ''; } } diff --git a/packages/server/src/download-request/services/study-download-request.service.ts b/packages/server/src/download-request/services/study-download-request.service.ts index 15d9f41..3febecd 100644 --- a/packages/server/src/download-request/services/study-download-request.service.ts +++ b/packages/server/src/download-request/services/study-download-request.service.ts @@ -90,6 +90,7 @@ export class StudyDownloadService { request = (await this.downloadRequestModel.findById(request._id))!; // Download the entries that were generated as part of this study + /* await this.downloadService.startZipJob({ entryJSONLocation: request.entryJSONLocation!, entryZIPLocation: request.entryZIPLocation!, @@ -107,9 +108,11 @@ export class StudyDownloadService { bucket: (await this.bucketFactory.getBucket(request.organization))!, organization: request.organization }); + */ // Download the tag data as a CSV await this.generateCSV(request); // Download the entries that were tagged in this study + /* await this.downloadService.startZipJob({ entryJSONLocation: request.taggedEntriesJSONLocation!, entryZIPLocation: request.taggedEntriesZipLocation!, @@ -126,7 +129,7 @@ export class StudyDownloadService { entries: await this.getLabeledEntries(request), bucket: (await this.bucketFactory.getBucket(request.organization))!, organization: request.organization - }); + }); */ return request; } @@ -193,37 +196,18 @@ export class StudyDownloadService { throw new Error(`Study with id ${downloadRequest.study} not found`); } - // Turn the tag fields into their "CSV-friendly" format - const converted: any[] = []; - for (const tag of tags) { - const tagFields: any = {}; - - // Add basic meta-fields - tagFields['prompt'] = (await this.entryService.find(tag.entry))!.bucketLocation.split('/').pop(); - - for (const field of tag.data!) { - // For video fields, each entry is represented by the filename - if (field.type == TagFieldType.VIDEO_RECORD) { - const videoField = (await this.videoFieldService.find(field.data))!; - for (let index = 0; index < videoField.entries.length; index++) { - const entryID = videoField.entries[index]; - const entry = (await this.entryService.find(entryID))!; - tagFields[`${field.name}-${index}`] = entry.bucketLocation.split('/').pop(); - } - } else { - tagFields[`${field.name}`] = field.data; - } - } - converted.push(tagFields); - } - // Convert the data into a CSV const csvFields = await this.getFieldTransformers(study); const headers = csvFields.map((csvField) => csvField.header).join(','); - const body = tags.map((tag) => { - return csvFields.map((csvField) => csvField.convertField(tag)).join(','); - }).join('\n'); + let body = ''; + for (const tag of tags) { + const row: string[] = []; + for (const csvField of csvFields) { + row.push(await csvField.convertField(tag)); + } + body = body + row.join(',') + '\n'; + } const dataString = headers + '\n' + body; @@ -255,19 +239,6 @@ export class StudyDownloadService { return bucket.getSignedUrl(location, BucketObjectAction.READ, new Date(Date.now() + this.expiration)); } - /** - * TODO: Improve the CSV process, need a better method to determine the headers and handle default values - */ - private convertToCSV(arr: any[]): string { - const array = [Object.keys(arr[0])].concat(arr); - - return array - .map((it) => { - return Object.values(it).toString(); - }) - .join('\n'); - } - /** * Get the entries taged as part of the study */ @@ -293,10 +264,23 @@ export class StudyDownloadService { /** Get the list of CSV tranformers that can convert the tag data */ private async getFieldTransformers(study: Study): Promise { + const csvFields: CsvField[] = []; + + // Add the meta data converts + csvFields.push({ + header: 'prompt', + convertField: async (tag) => { + const entry = await this.entryService.find(tag.entry); + if (!entry) { + throw new Error(`Entry with id ${tag.entry} not found`); + } + return entry.bucketLocation.split('/').pop() || ''; + } + }) + // Go through all the properties in the data schema - const propertyNames = Object.getOwnPropertyNames(study.tagSchema.dataSchema); + const propertyNames = Object.getOwnPropertyNames(study.tagSchema.dataSchema.properties); - const csvFields: CsvField[] = []; for(const propertyName of propertyNames) { // Get the data schema and the ui schema @@ -304,6 +288,7 @@ export class StudyDownloadService { const uiSchema = study.tagSchema.uiSchema.elements.find( (element: any) => element.scope === `#/properties/${propertyName}` ); + if (!dataSchema || !uiSchema) { throw new Error(`Could not find schema for property ${propertyName}`); } @@ -312,7 +297,7 @@ export class StudyDownloadService { if (videoCsvTest(uiSchema, dataSchema)) { const minVideos = uiSchema.options!.minimumRequired!; - let maxVideos = uiSchema.options!.maximumRequired; + let maxVideos = uiSchema.options!.maximumOptional; if (!maxVideos) { maxVideos = minVideos; } @@ -342,12 +327,13 @@ export class StudyDownloadService { } else if (basicCsvTest(uiSchema, dataSchema)) { csvFields.push({ header: propertyName, - convertField: (tag) => { + convertField: async (tag) => { const tagField = tag.data?.find((field) => field.name == propertyName); if (!tagField) { throw new Error(`Tag field ${propertyName} not found`); } - return this.basicCsvTransformer.transform(tagField.data); + + return await this.basicCsvTransformer.transform(tagField.data); } }); } else { From a3f2012b43e846a26dd1731754107730a497b7cd Mon Sep 17 00:00:00 2001 From: cbolles Date: Mon, 13 May 2024 13:45:48 -0400 Subject: [PATCH 4/4] fix formatting --- .../services/study-download-request.service.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/server/src/download-request/services/study-download-request.service.ts b/packages/server/src/download-request/services/study-download-request.service.ts index 3febecd..7e3475a 100644 --- a/packages/server/src/download-request/services/study-download-request.service.ts +++ b/packages/server/src/download-request/services/study-download-request.service.ts @@ -276,13 +276,12 @@ export class StudyDownloadService { } return entry.bucketLocation.split('/').pop() || ''; } - }) + }); // Go through all the properties in the data schema const propertyNames = Object.getOwnPropertyNames(study.tagSchema.dataSchema.properties); - - for(const propertyName of propertyNames) { + for (const propertyName of propertyNames) { // Get the data schema and the ui schema const dataSchema = study.tagSchema.dataSchema.properties![propertyName]; const uiSchema = study.tagSchema.uiSchema.elements.find( @@ -323,7 +322,6 @@ export class StudyDownloadService { } }); } - } else if (basicCsvTest(uiSchema, dataSchema)) { csvFields.push({ header: propertyName,