-
Notifications
You must be signed in to change notification settings - Fork 100
add s3 api to adapt cdc log (#456) #463
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,6 +46,17 @@ type s3Handlers interface { | |
| ListObjectsWithContext(context.Context, *s3.ListObjectsInput, ...request.Option) (*s3.ListObjectsOutput, error) | ||
| HeadBucketWithContext(context.Context, *s3.HeadBucketInput, ...request.Option) (*s3.HeadBucketOutput, error) | ||
| WaitUntilObjectExistsWithContext(context.Context, *s3.HeadObjectInput, ...request.WaiterOption) error | ||
|
|
||
| ListObjectsV2WithContext(context.Context, *s3.ListObjectsV2Input, ...request.Option) (*s3.ListObjectsV2Output, error) | ||
| CreateMultipartUploadWithContext( | ||
| context.Context, | ||
| *s3.CreateMultipartUploadInput, | ||
| ...request.Option) (*s3.CreateMultipartUploadOutput, error) | ||
| CompleteMultipartUploadWithContext( | ||
| context.Context, | ||
| *s3.CompleteMultipartUploadInput, | ||
| ...request.Option) (*s3.CompleteMultipartUploadOutput, error) | ||
| UploadPartWithContext(context.Context, *s3.UploadPartInput, ...request.Option) (*s3.UploadPartOutput, error) | ||
| } | ||
|
|
||
| // S3Storage info for s3 storage. | ||
|
|
@@ -55,6 +66,50 @@ type S3Storage struct { | |
| options *backup.S3 | ||
| } | ||
|
|
||
| // S3Uploader does multi-part upload to s3. | ||
| type S3Uploader struct { | ||
| svc s3Handlers | ||
| createOutput *s3.CreateMultipartUploadOutput | ||
| completeParts []*s3.CompletedPart | ||
| } | ||
|
|
||
| // UploadPart update partial data to s3, we should call CreateMultipartUpload to start it, | ||
| // and call CompleteMultipartUpload to finish it. | ||
| func (u *S3Uploader) UploadPart(ctx context.Context, data []byte) error { | ||
| partInput := &s3.UploadPartInput{ | ||
| Body: bytes.NewReader(data), | ||
| Bucket: u.createOutput.Bucket, | ||
| Key: u.createOutput.Key, | ||
| PartNumber: aws.Int64(int64(len(u.completeParts) + 1)), | ||
| UploadId: u.createOutput.UploadId, | ||
| ContentLength: aws.Int64(int64(len(data))), | ||
| } | ||
|
|
||
| uploadResult, err := u.svc.UploadPartWithContext(ctx, partInput) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| u.completeParts = append(u.completeParts, &s3.CompletedPart{ | ||
| ETag: uploadResult.ETag, | ||
| PartNumber: partInput.PartNumber, | ||
| }) | ||
| return nil | ||
| } | ||
|
|
||
| // CompleteUpload complete multi upload request. | ||
| func (u *S3Uploader) CompleteUpload(ctx context.Context) error { | ||
| completeInput := &s3.CompleteMultipartUploadInput{ | ||
| Bucket: u.createOutput.Bucket, | ||
| Key: u.createOutput.Key, | ||
| UploadId: u.createOutput.UploadId, | ||
| MultipartUpload: &s3.CompletedMultipartUpload{ | ||
| Parts: u.completeParts, | ||
| }, | ||
| } | ||
| _, err := u.svc.CompleteMultipartUploadWithContext(ctx, completeInput) | ||
| return err | ||
| } | ||
|
|
||
| // S3BackendOptions contains options for s3 storage. | ||
| type S3BackendOptions struct { | ||
| Endpoint string `json:"endpoint" toml:"endpoint"` | ||
|
|
@@ -70,7 +125,8 @@ type S3BackendOptions struct { | |
| UseAccelerateEndpoint bool `json:"use-accelerate-endpoint" toml:"use-accelerate-endpoint"` | ||
| } | ||
|
|
||
| func (options *S3BackendOptions) apply(s3 *backup.S3) error { | ||
| // Apply apply s3 options on backup.S3. | ||
| func (options *S3BackendOptions) Apply(s3 *backup.S3) error { | ||
| if options.Region == "" { | ||
| options.Region = "us-east-1" | ||
| } | ||
|
|
@@ -161,8 +217,8 @@ func (options *S3BackendOptions) parseFromFlags(flags *pflag.FlagSet) error { | |
| return nil | ||
| } | ||
|
|
||
| // newS3Storage initialize a new s3 storage for metadata. | ||
| func newS3Storage( // revive:disable-line:flag-parameter | ||
| // NewS3Storage initialize a new s3 storage for metadata. | ||
| func NewS3Storage( // revive:disable-line:flag-parameter | ||
| backend *backup.S3, | ||
| sendCredential bool, | ||
| ) (*S3Storage, error) { | ||
|
|
@@ -307,13 +363,17 @@ func (rs *S3Storage) FileExists(ctx context.Context, file string) (bool, error) | |
| // The first argument is the file path that can be used in `Open` | ||
| // function; the second argument is the size in byte of the file determined | ||
| // by path. | ||
| func (rs *S3Storage) WalkDir(ctx context.Context, fn func(string, int64) error) error { | ||
| func (rs *S3Storage) WalkDir(ctx context.Context, dir string, listCount int64, fn func(string, int64) error) error { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As the
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll move these parameters in ctx after this PR merged, because this PR is a cherry-pick PR. |
||
| var marker *string | ||
| prefix := rs.options.Prefix + dir | ||
| maxKeys := int64(1000) | ||
| if listCount > 0 { | ||
| maxKeys = listCount | ||
| } | ||
| req := &s3.ListObjectsInput{ | ||
| Bucket: &rs.options.Bucket, | ||
| Prefix: &rs.options.Prefix, | ||
| MaxKeys: &maxKeys, | ||
| Bucket: aws.String(rs.options.Bucket), | ||
| Prefix: aws.String(prefix), | ||
| MaxKeys: aws.Int64(maxKeys), | ||
| } | ||
| for { | ||
| req.Marker = marker | ||
|
|
@@ -440,3 +500,20 @@ func (r *s3ObjectReader) Seek(offset int64, whence int) (int64, error) { | |
| r.pos = realOffset | ||
| return realOffset, nil | ||
| } | ||
|
|
||
| // CreateUploader create multi upload request. | ||
| func (rs *S3Storage) CreateUploader(ctx context.Context, name string) (Uploader, error) { | ||
| input := &s3.CreateMultipartUploadInput{ | ||
| Bucket: aws.String(rs.options.Bucket), | ||
| Key: aws.String(rs.options.Prefix + name), | ||
| } | ||
| resp, err := rs.svc.CreateMultipartUploadWithContext(ctx, input) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
| return &S3Uploader{ | ||
| svc: rs.svc, | ||
| createOutput: resp, | ||
| completeParts: make([]*s3.CompletedPart, 0, 128), | ||
| }, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,14 @@ type ReadSeekCloser interface { | |
| io.Closer | ||
| } | ||
|
|
||
| // Uploader upload file with chunks. | ||
| type Uploader interface { | ||
| // UploadPart upload part of file data to storage | ||
| UploadPart(ctx context.Context, data []byte) error | ||
| // CompleteUpload make the upload data to a complete file | ||
| CompleteUpload(ctx context.Context) error | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, to name this as |
||
| } | ||
|
|
||
| // ExternalStorage represents a kind of file system storage. | ||
| type ExternalStorage interface { | ||
| // Write file to storage | ||
|
|
@@ -33,7 +41,12 @@ type ExternalStorage interface { | |
| // The argument `path` is the file path that can be used in `Open` | ||
| // function; the argument `size` is the size in byte of the file determined | ||
| // by path. | ||
| WalkDir(ctx context.Context, fn func(path string, size int64) error) error | ||
| WalkDir(ctx context.Context, dir string, listCount int64, fn func(path string, size int64) error) error | ||
|
|
||
| // CreateUploader create a uploader that will upload chunks data to storage. | ||
| // It's design for s3 multi-part upload currently. e.g. cdc log backup use this to do multi part upload | ||
| // to avoid generate small fragment files. | ||
| CreateUploader(ctx context.Context, name string) (Uploader, error) | ||
| } | ||
|
|
||
| // Create creates ExternalStorage. | ||
|
|
@@ -45,7 +58,7 @@ func Create(ctx context.Context, backend *backup.StorageBackend, sendCreds bool) | |
| if backend.S3 == nil { | ||
| return nil, errors.New("s3 config not found") | ||
| } | ||
| return newS3Storage(backend.S3, sendCreds) | ||
| return NewS3Storage(backend.S3, sendCreds) | ||
| case *backup.StorageBackend_Noop: | ||
| return newNoopStorage(), nil | ||
| case *backup.StorageBackend_Gcs: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ func (tbl *Table) NoChecksum() bool { | |
|
|
||
| // NeedAutoID checks whether the table needs backing up with an autoid. | ||
| func NeedAutoID(tblInfo *model.TableInfo) bool { | ||
| hasRowID := !tblInfo.PKIsHandle && !tblInfo.IsCommonHandle | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LGTM |
||
| hasRowID := !tblInfo.PKIsHandle | ||
| hasAutoIncID := tblInfo.GetAutoIncrementColInfo() != nil | ||
| return hasRowID || hasAutoIncID | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the
dirparameter means a specific sub dir to walk, the local implements should only walk the sub dir instead of ther whole base dir?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll fix it in next PR. because this is a cherry-pick PR