-
Notifications
You must be signed in to change notification settings - Fork 21
Feat/allow small pieces #479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c0b600f
4f1f3d7
d4ab4e9
932668c
6015fff
f04d3c8
c38e511
1e8cba5
cfbcab9
108afab
b4accae
a0ab4b1
f31e212
7f88f1a
e0f460d
bae794f
ce2d30e
f465731
515c52c
c522d7c
9f35caa
5d73bbe
1c4a325
25ca84b
7ed4aa2
398367d
ea47335
5472869
66575a8
cc18ff4
7a16bd7
2d8c6de
147fe0c
21c4808
1c097be
99534b8
67f44c1
c4a08d6
96dd04d
2163347
50bfa05
ae55669
4a06ae7
d5cf9ea
93d3f60
372fa95
bb14305
aae08a6
788e14f
8597da1
18d1198
efdc703
1a099f0
a7f478f
b0effcf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -451,6 +451,10 @@ func TestDataPrep(t *testing.T) { | |
| require.Equal(t, pieceCID, calculatedPieceCID) | ||
| err = os.WriteFile(filepath.Join(downloadDir, pieceCID+".car"), downloaded, 0777) | ||
| require.NoError(t, err) | ||
|
|
||
| // Verify piece size is a power of two | ||
| pieceSize := uint64(len(downloaded)) | ||
| require.True(t, util.IsPowerOfTwo(pieceSize), "piece size %d is not a power of two", pieceSize) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this just testing the default value? |
||
| } | ||
|
|
||
| // Download all pieces using local download server | ||
|
|
@@ -499,7 +503,7 @@ func TestNoDuplicatedOutput(t *testing.T) { | |
| _, _, err = runner.Run(ctx, fmt.Sprintf("singularity storage create local --name source --path %s", testutil.EscapePath(source))) | ||
| require.NoError(t, err) | ||
|
|
||
| _, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB", testutil.EscapePath(output))) | ||
| _, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB --min-piece-size=256KiB", testutil.EscapePath(output))) | ||
| require.NoError(t, err) | ||
|
|
||
| // Start scanning | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ type CreateRequest struct { | |
| OutputStorages []string `json:"outputStorages"` // Name of Output storage systems to be used for the output | ||
| MaxSizeStr string `default:"31.5GiB" json:"maxSize"` // Maximum size of the CAR files to be created | ||
| PieceSizeStr string `default:"" json:"pieceSize"` // Target piece size of the CAR files used for piece commitment calculation | ||
| MinPieceSizeStr string `default:"1MiB" json:"minPieceSize"` // Minimum piece size for the preparation, applies only to DAG and remainer pieces | ||
| DeleteAfterExport bool `default:"false" json:"deleteAfterExport"` // Whether to delete the source files after export | ||
| NoInline bool `default:"false" json:"noInline"` // Whether to disable inline storage for the preparation. Can save database space but requires at least one output storage. | ||
| NoDag bool `default:"false" json:"noDag"` // Whether to disable maintaining folder dag structure for the sources. If disabled, DagGen will not be possible and folders will not have an associated CID. | ||
|
|
@@ -77,6 +78,24 @@ func ValidateCreateRequest(ctx context.Context, db *gorm.DB, request CreateReque | |
| return nil, errors.Wrap(handlererror.ErrInvalidParameter, "maxSize needs to be reduced to leave space for padding") | ||
| } | ||
|
|
||
| minPieceSizeStr := request.MinPieceSizeStr | ||
| if minPieceSizeStr == "" { | ||
| minPieceSizeStr = "1MiB" | ||
| } | ||
|
|
||
| minPieceSize, err := humanize.ParseBytes(minPieceSizeStr) | ||
| if err != nil { | ||
| return nil, errors.Join(handlererror.ErrInvalidParameter, errors.Wrapf(err, "invalid value for minPieceSize: %s", minPieceSizeStr)) | ||
| } | ||
|
|
||
| if minPieceSize > pieceSize { | ||
| return nil, errors.Wrap(handlererror.ErrInvalidParameter, "minPieceSize cannot be larger than pieceSize") | ||
| } | ||
|
|
||
| if minPieceSize != util.NextPowerOfTwo(minPieceSize) { | ||
| return nil, errors.Wrap(handlererror.ErrInvalidParameter, "minPieceSize must be a power of two") | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to add a check here to ensure the min-piece-size never over-pads in a way that makes things non-compliant. |
||
|
|
||
| var sources []model.Storage | ||
| for _, name := range request.SourceStorages { | ||
| var source model.Storage | ||
|
|
@@ -114,6 +133,7 @@ func ValidateCreateRequest(ctx context.Context, db *gorm.DB, request CreateReque | |
| return &model.Preparation{ | ||
| MaxSize: int64(maxSize), | ||
| PieceSize: int64(pieceSize), | ||
| MinPieceSize: int64(minPieceSize), | ||
| SourceStorages: sources, | ||
| OutputStorages: outputs, | ||
| DeleteAfterExport: request.DeleteAfterExport, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,13 @@ import ( | |
| "gorm.io/gorm" | ||
| ) | ||
|
|
||
| type PieceType string | ||
|
|
||
| const ( | ||
| DataPiece PieceType = "data" | ||
| DagPiece PieceType = "dag" | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you document the difference between these two? Data and DAG are both very overloaded terms in this space. |
||
| ) | ||
|
|
||
| type Worker struct { | ||
| ID string `gorm:"primaryKey" json:"id"` | ||
| LastHeartbeat time.Time `json:"lastHeartbeat"` | ||
|
|
@@ -34,6 +41,7 @@ type Preparation struct { | |
| DeleteAfterExport bool `json:"deleteAfterExport"` // DeleteAfterExport is a flag that indicates whether the source files should be deleted after export. | ||
| MaxSize int64 `json:"maxSize"` | ||
| PieceSize int64 `json:"pieceSize"` | ||
| MinPieceSize int64 `json:"minPieceSize"` // Minimum piece size for the preparation, applies only to DAG and remainder pieces | ||
| NoInline bool `json:"noInline"` | ||
| NoDag bool `json:"noDag"` | ||
|
|
||
|
|
@@ -252,6 +260,7 @@ type CarID uint32 | |
| type Car struct { | ||
| ID CarID `cbor:"-" gorm:"primaryKey" json:"id" table:"verbose"` | ||
| CreatedAt time.Time `cbor:"-" json:"createdAt" table:"verbose;format:2006-01-02 15:04:05"` | ||
| PieceType PieceType `cbor:"0,keyasint,omitempty" json:"pieceType" swaggertype:"string"` // PieceType indicates whether this is a data piece or DAG piece | ||
| PieceCID CID `cbor:"1,keyasint,omitempty" gorm:"column:piece_cid;index;type:bytes;size:255" json:"pieceCid" swaggertype:"string"` | ||
| PieceSize int64 `cbor:"2,keyasint,omitempty" json:"pieceSize"` | ||
| RootCID CID `cbor:"3,keyasint,omitempty" gorm:"column:root_cid;type:bytes" json:"rootCid" swaggertype:"string"` | ||
|
|
@@ -319,3 +328,12 @@ func (c CarBlock) BlockLength() int32 { | |
|
|
||
| return c.blockLength | ||
| } | ||
|
|
||
| // GetMinPieceSize returns the minimum piece size for the preparation, with a fallback to 1MiB if not set. | ||
| // This ensures backward compatibility with older preparations that don't have minPieceSize set. | ||
| func (p *Preparation) GetMinPieceSize() int64 { | ||
| if p.MinPieceSize == 0 { | ||
| return 1 << 20 // 1MiB | ||
| } | ||
| return p.MinPieceSize | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we have to pad at all? And are we sure this is compatible with the limits on padding that the Fil+ program set? Whatever that limit is, I think it would be good to either document it here, or check elsewhere to ensure that
min-piece-sizeisn't set high enough to cause an issue.