Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions .eslintrc.js

This file was deleted.

5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,7 @@ $RECYCLE.BIN/
*.js
!.eslintrc.js
*.js.map
samconfig.toml
samconfig.toml
*.d.ts
.vscode/

11 changes: 5 additions & 6 deletions bin/summarise.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env ./node_modules/.bin/ts-node

import { readFileSync } from 'fs'
import { readFileSync } from 'node:fs'
import { createSummary } from '../lib/summarisation'

import yargs from 'yargs'
import { Transcript } from '../lib/types'
import type { Transcript } from '../lib/types'

interface Argv {
transcriptFilePath: string
Expand All @@ -15,20 +15,19 @@ const argv: Argv = yargs
.usage('Usage: $0 <transcriptFilePath>')
.command('$0 <transcriptFilePath>', 'path to the transcript JSON file')
.options('bedrock-region', {
default: 'us-east-1'
default: 'us-east-1',
})
.alias('h', 'help')
.help('help')
.demandCommand(1, 'You need to provide a filename argument.')
.argv as unknown as Argv
.demandCommand(1, 'You need to provide a filename argument.').argv as unknown as Argv

const filename: string = argv.transcriptFilePath
const transcriptContents = readFileSync(filename, 'utf8')
const transcript = JSON.parse(transcriptContents)

summarise(transcript)

async function summarise (transcript: Transcript): Promise<void> {
async function summarise(transcript: Transcript): Promise<void> {
const summary = await createSummary(transcript, { bedrockRegion: argv.bedrockRegion })
console.log(JSON.stringify(summary, null, 4))
}
33 changes: 33 additions & 0 deletions biome.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"vcs": {
"enabled": true,
"clientKind": "git",
"useIgnoreFile": true
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2,
"lineWidth": 120
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "single",
"semicolons": "asNeeded"
}
}
}
2 changes: 1 addition & 1 deletion events/sample-pr-event.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@
]
},
"transcriptKey": "processed-transcripts/998.json"
}
}
35 changes: 22 additions & 13 deletions functions/pull-request/app.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import path from 'node:path'
import { tmpdir } from 'node:os'
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'
import { simpleGit } from 'simple-git'
import { SSMClient, GetParameterCommand } from '@aws-sdk/client-ssm'
import { tmpdir } from 'node:os'
import path from 'node:path'
import { S3Client } from '@aws-sdk/client-s3'
import { GetParameterCommand, SSMClient } from '@aws-sdk/client-ssm'
import { Octokit } from 'octokit'
import { simpleGit } from 'simple-git'

import envs from '../../lib/envs'
import type { Summary } from '../../lib/types'
import { logger, middify } from '../../lib/lambda-common'
import type { Summary } from '../../lib/types'
import { getS3JSON } from '../../lib/utils'
import { createPullRequestDescription } from './pr-description'

Expand All @@ -21,10 +21,12 @@ const ssmClient = new SSMClient({})
const s3Client = new S3Client({})

// A personal access token is used to clone and push from/to GitHub
const gitHubUserCredentialsPromise = ssmClient.send(new GetParameterCommand({
Name: GIT_HUB_CREDENTIALS_SSM_PARAMETER,
WithDecryption: true
}))
const gitHubUserCredentialsPromise = ssmClient.send(
new GetParameterCommand({
Name: GIT_HUB_CREDENTIALS_SSM_PARAMETER,
WithDecryption: true,
}),
)

interface PullRequestEvent {
transcriptKey: string
Expand Down Expand Up @@ -59,7 +61,9 @@ export const handleEvent = middify(async (event: PullRequestEvent) => {
}
const [username, password] = gitHubUserCredentials.split(':')
if (username === undefined || password === undefined || username === '' || password === '') {
throw new Error(`${GIT_HUB_CREDENTIALS_SSM_PARAMETER} SSM Parameter should be in the format <Username>:<GitHubPersonalAccessToken>`)
throw new Error(
`${GIT_HUB_CREDENTIALS_SSM_PARAMETER} SSM Parameter should be in the format <Username>:<GitHubPersonalAccessToken>`,
)
}

const gitUrl = new URL(GIT_REPO_URL)
Expand Down Expand Up @@ -97,9 +101,14 @@ export const handleEvent = middify(async (event: PullRequestEvent) => {
const base = TARGET_BRANCH === undefined ? 'main' : TARGET_BRANCH
const repoPath = gitUrl.pathname
const [, owner] = repoPath.split('/')
const response = await octokit.request(
`POST /repos/${owner}/${repoName}/pulls`, { owner, title, body, head, base, repo: repoName }
)
const response = await octokit.request(`POST /repos/${owner}/${repoName}/pulls`, {
owner,
title,
body,
head,
base,
repo: repoName,
})
const prUrl = response.data.html_url
console.log('Created PR', { prUrl })
return { prUrl }
Expand Down
10 changes: 5 additions & 5 deletions functions/pull-request/pr-description.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import { Summary } from '../../lib/types'
import type { Summary } from '../../lib/types'

/**
* Convert decimal seconds to mm:ss format
*
* @param seconds seconds
* @returns a string like '00:00' or '123:59'
*/
export function secondsToYtTime (seconds: number): string {
export function secondsToYtTime(seconds: number): string {
const hours = String(Math.floor(seconds / 3600))
const minutes = String(Math.floor(seconds % 3600 / 60))
const minutes = String(Math.floor((seconds % 3600) / 60))
const roundedSeconds = String(Math.floor(seconds % 60))
let res = ''
if (hours !== '0') {
Expand All @@ -21,8 +21,8 @@ export function secondsToYtTime (seconds: number): string {
/**
* Create a PR markdown description from the generated summary of the episode and chapters
*/
export function createPullRequestDescription (summary: Summary): string {
const ytChapters = summary.chapters.map(c => `${secondsToYtTime(c.startTimestamp)} ${c.summary}`).join('\n')
export function createPullRequestDescription(summary: Summary): string {
const ytChapters = summary.chapters.map((c) => `${secondsToYtTime(c.startTimestamp)} ${c.summary}`).join('\n')
return `
# Transcript

Expand Down
8 changes: 4 additions & 4 deletions functions/summary/app.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import path from 'node:path'
import { S3Client } from '@aws-sdk/client-s3'
import { MetricUnits } from '@aws-lambda-powertools/metrics'
import { S3Client } from '@aws-sdk/client-s3'

import envs from '../../lib/envs'
import { logger, metrics, middify, tracer } from '../../lib/lambda-common'
import { getS3JSON } from '../../lib/utils'
import { createSummary } from '../../lib/summarisation'
import { Summary, Transcript } from '../../lib/types'
import type { Summary, Transcript } from '../../lib/types'
import { getS3JSON } from '../../lib/utils'

const { BUCKET_NAME } = envs

Expand Down Expand Up @@ -36,6 +36,6 @@ export const handleEvent = middify(async (event: SummarisationEvent): Promise<Su
metrics.addMetric('ChapterCount', MetricUnits.Count, summary.chapters.length)

return {
summary
summary,
}
})
21 changes: 12 additions & 9 deletions lib/envs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ type Environment = Record<string, string>
* `env.BUCKET_NAME` will throw an Error if BUCKET_NAME is not defined in the environment.
* This eliminates the need to check the existence of each environment variable where it is used.
*/
const envProxy: Environment = new Proxy({}, {
get (_target: Record<string, string>, name: string): string {
const value = process.env[name]
if (value === undefined) {
throw new Error(`Environment variable ${name} is not set`)
}
return value
}
})
const envProxy: Environment = new Proxy(
{},
{
get(_target: Record<string, string>, name: string): string {
const value = process.env[name]
if (value === undefined) {
throw new Error(`Environment variable ${name} is not set`)
}
return value
},
},
)

export default envProxy
12 changes: 9 additions & 3 deletions lib/lambda-common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,28 @@ import { Logger, injectLambdaContext } from '@aws-lambda-powertools/logger'
import { Metrics, logMetrics } from '@aws-lambda-powertools/metrics'
import { Tracer, captureLambdaHandler } from '@aws-lambda-powertools/tracer'
import middy from '@middy/core'
import { Handler } from 'aws-lambda'
import type { Context } from 'aws-lambda'

// Exported powertools instances for use anywhere within a Lambda function implementation
export const logger = new Logger()
export const tracer = new Tracer()
export const metrics = new Metrics()

export type LambdaPromiseHandler<TEvent, TResult> = (event: TEvent, context: Context) => Promise<TResult>

/**
* Create a wrapped Lambda Function handler with injected powertools logger, tracer and metrics
*
* @param handler The undecorated Lambda Function handler
* @returns A 'middified' handler
*/
export const middify = (handler: Handler): Handler => {
return middy(handler)

export const middify = <TEvent, TResult>(
handler: LambdaPromiseHandler<TEvent, TResult>,
): LambdaPromiseHandler<TEvent, TResult> => {
return middy<TEvent, TResult>()
.use(injectLambdaContext(logger, { logEvent: true }))
.use(logMetrics(metrics))
.use(captureLambdaHandler(tracer))
.handler(handler)
}
4 changes: 2 additions & 2 deletions lib/prompt-template.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Transcript } from './types'
import type { Transcript } from './types'

export function createPrompt (transcript: Transcript): string {
export function createPrompt(transcript: Transcript): string {
return `Human: Please provide a friendly, positive episode summary (first-person plural and at least 120 words),
followed by at least 10 chapter summaries for the following podcast transcript JSON.
The transcript segments have start and end time in floating point seconds.
Expand Down
15 changes: 10 additions & 5 deletions lib/summarisation.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { BedrockRuntimeClient, InvokeModelCommand } from '@aws-sdk/client-bedrock-runtime'
import { Summary, Transcript } from './types'
import envs from './envs'
import { createPrompt } from './prompt-template'
import type { Summary, Transcript } from './types'

const MODEL_ID = 'anthropic.claude-v2'

Expand All @@ -10,9 +10,14 @@ const MODEL_ID = 'anthropic.claude-v2'
*
* @param transcript The episode transcript
*/
export async function createSummary (transcript: Transcript, options: { bedrockRegion?: string } = {}): Promise<Summary> {
export async function createSummary(
transcript: Transcript,
options: { bedrockRegion?: string } = {},
): Promise<Summary> {
const { bedrockRegion } = options
const brClient = new BedrockRuntimeClient({ region: bedrockRegion === undefined ? envs.BEDROCK_REGION : bedrockRegion })
const brClient = new BedrockRuntimeClient({
region: bedrockRegion === undefined ? envs.BEDROCK_REGION : bedrockRegion,
})

const prompt = createPrompt(transcript)
const modelInput = JSON.stringify({
Expand All @@ -21,14 +26,14 @@ export async function createSummary (transcript: Transcript, options: { bedrockR
temperature: 0.5,
top_k: 250,
top_p: 1,
stop_sequences: []
stop_sequences: [],
})

const invokeModelCommand = new InvokeModelCommand({
body: modelInput,
modelId: MODEL_ID,
accept: 'application/json',
contentType: 'application/json'
contentType: 'application/json',
})

const modelResponse = await brClient.send(invokeModelCommand)
Expand Down
20 changes: 11 additions & 9 deletions lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Readable } from 'node:stream'
import type { Readable } from 'node:stream'
import { GetObjectCommand, type S3Client } from '@aws-sdk/client-s3'
import { logger } from './lambda-common'

Expand All @@ -10,16 +10,18 @@ import { logger } from './lambda-common'
* @param key The S3 object key
* @returns The retrieved JSON as an object
*/
export async function getS3JSON<T = any> (s3Client: S3Client, bucket: string, key: string): Promise<T> {
export async function getS3JSON<T = unknown>(s3Client: S3Client, bucket: string, key: string): Promise<T> {
logger.info('Getting object', { bucket, key })
const response = await s3Client.send(new GetObjectCommand({
Bucket: bucket,
Key: key
}))
const response = await s3Client.send(
new GetObjectCommand({
Bucket: bucket,
Key: key,
}),
)

const chunks = []
for await (const chunk of response.Body as any as Readable) {
chunks.push(chunk as never)
const chunks: Uint8Array[] = []
for await (const chunk of response.Body as unknown as Readable) {
chunks.push(chunk as Uint8Array)
}

return JSON.parse(Buffer.concat(chunks).toString('utf-8'))
Expand Down
Loading