Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
c0b600f
update database schema diagram
Sep 19, 2024
4f1f3d7
Merge branch 'main' of github.com:data-preservation-programs/singularity
Feb 27, 2025
d4ab4e9
devcontainers ignore
Mar 25, 2025
932668c
update settings for 2.x linter
Mar 25, 2025
6015fff
fix linting with golangci v2
parkan Apr 30, 2025
f04d3c8
use modern `go install` instead of GO111MODULE
parkan Apr 30, 2025
c38e511
modernize toolchain
parkan Apr 30, 2025
1e8cba5
fix linter errors
parkan Apr 30, 2025
cfbcab9
make linter work on github actions
parkan May 1, 2025
108afab
try alternate migration strategy
parkan May 1, 2025
b4accae
unused import, formatting
parkan May 1, 2025
a0ab4b1
disable linters added since 1.55.2, suppress errcheck
parkan May 1, 2025
f31e212
lint autofixes
parkan May 1, 2025
7f88f1a
int32 for epoch is safe, auto-fixes
parkan May 1, 2025
e0f460d
update linter name for (go)err113
parkan May 1, 2025
bae794f
use core slices instead of exp/slices
parkan May 1, 2025
ce2d30e
disable lint for remaining epoch and cid/block length sites
parkan May 1, 2025
f465731
thelper fix, autofix
parkan May 1, 2025
515c52c
suppress predeclared since that's what the param is called
parkan May 1, 2025
c522d7c
suppress recvcheck for Value() methods
parkan May 1, 2025
9f35caa
better suppression for revcheck for Value
parkan May 1, 2025
5d73bbe
exclude marshalling functions from recvcheck
parkan May 1, 2025
1c4a325
always pass server by pointer, clear last recvcheck
parkan May 1, 2025
25ca84b
add nil check
parkan May 1, 2025
7ed4aa2
suppress file perms errors for docs
parkan May 1, 2025
398367d
warn on and handle unknown/-1 file size in assembler
parkan May 1, 2025
ea47335
suppress remaining overflow warnings :(
parkan May 1, 2025
5472869
tidy
parkan May 1, 2025
66575a8
infer go version from go.mod for sake of sanity
parkan May 1, 2025
cc18ff4
generate
parkan May 1, 2025
7a16bd7
add --min-piece-size parameter to preparation
Apr 1, 2025
2d8c6de
add PieceType field to Car model
Apr 3, 2025
147fe0c
add explicit test for power of two piece size
Apr 3, 2025
21c4808
correct default handler
Apr 3, 2025
1c097be
use min piece size instead of piecesize for dags
Apr 3, 2025
99534b8
update test to reflect smaller piece
Apr 3, 2025
67f44c1
EXPERIMENTAL: allow sending DAG pieces without attachments
parkan Apr 10, 2025
c4a08d6
increase default minimum piece size to 2MiB
parkan Apr 11, 2025
96dd04d
Revert "EXPERIMENTAL: allow sending DAG pieces without attachments"
parkan Apr 11, 2025
2163347
defaultMinPieceSize = 1MiB as per agreement w/curio team
parkan Apr 22, 2025
50bfa05
resolve bizarre git behavior
parkan Apr 22, 2025
ae55669
use MinPieceSize as target value instead of PieceSize
parkan Apr 22, 2025
4a06ae7
Handle legacy prep with no minPieceSize case
parkan Apr 22, 2025
d5cf9ea
specify smaller min-piece-size for test with very small max-size
parkan Apr 22, 2025
93d3f60
skip DB tests correctly
parkan Apr 29, 2025
372fa95
Adjust fixture CIDs to match new behavior (don't overpad)
parkan Apr 29, 2025
bb14305
Add comprehensive tests for piece padding
parkan Apr 30, 2025
aae08a6
tighten up car count assertion
parkan Apr 30, 2025
788e14f
remove dead code
parkan May 1, 2025
8597da1
generate in CI does not work, thanks github
parkan May 1, 2025
18d1198
minor cleanup
parkan May 2, 2025
efdc703
Merge branch 'main' into feat/allow-small-pieces
parkan May 20, 2025
1a099f0
suppress warning about piece sizes as no longer relevant
parkan Jun 4, 2025
a7f478f
add explicit test for piece_type
parkan Jun 5, 2025
b0effcf
propose 0.6.0-RC1
parkan Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM golang:1.23.6-bullseye as builder
FROM golang:1.23.6-bullseye AS builder
WORKDIR /app
COPY go.* ./
RUN go mod download
Expand Down
3 changes: 3 additions & 0 deletions client/swagger/models/dataprep_create_request.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions client/swagger/models/model_car.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions client/swagger/models/model_preparation.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions cmd/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func TestBasicDataPrep(t *testing.T) {
require.True(t, listPiecesResp.IsSuccess())
require.Len(t, listPiecesResp.Payload, 1)
require.Len(t, listPiecesResp.Payload[0].Pieces, 1)
require.Equal(t, "baga6ea4seaqoahdvfwkrp64ecsxbjvyuqcwpz3o7ctxrjanlv2x4u2cq2qjf2ji", listPiecesResp.Payload[0].Pieces[0].PieceCid)
require.Equal(t, "baga6ea4seaqhmks2wnochilik4updmit54agfi5mjf6r7ehotu36ksdp46uxahi", listPiecesResp.Payload[0].Pieces[0].PieceCid)
// Start daggen
startDagGenResp, err := client.Job.StartDagGen(&job.StartDagGenParams{
ID: "prep",
Expand All @@ -285,7 +285,9 @@ func TestBasicDataPrep(t *testing.T) {
require.True(t, listPiecesResp.IsSuccess())
require.Len(t, listPiecesResp.Payload, 1)
require.Len(t, listPiecesResp.Payload[0].Pieces, 2)
require.Equal(t, "baga6ea4seaqoahdvfwkrp64ecsxbjvyuqcwpz3o7ctxrjanlv2x4u2cq2qjf2ji", listPiecesResp.Payload[0].Pieces[0].PieceCid)
require.Equal(t, "baga6ea4seaqbkouoyih2elxfrztq3gr23rpvgpx5e3fnud2rhvvzf4b7tneeyki", listPiecesResp.Payload[0].Pieces[1].PieceCid)
// data piece, full size
require.Equal(t, "baga6ea4seaqhmks2wnochilik4updmit54agfi5mjf6r7ehotu36ksdp46uxahi", listPiecesResp.Payload[0].Pieces[0].PieceCid)
// dag piece, min piece size
require.Equal(t, "baga6ea4seaqfoo2k3wmwp7gvxnc7hbjpb7ovtvt52tehwfvzxbreljcebbnwgiq", listPiecesResp.Payload[0].Pieces[1].PieceCid)
})
}
10 changes: 9 additions & 1 deletion cmd/dataprep/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ var CreateCmd = &cli.Command{
Value: "",
DefaultText: "Determined by --max-size",
},
&cli.StringFlag{
Name: "min-piece-size",
Usage: "The minimum size of a piece. Pieces smaller than this will be padded up to this size. It's recommended to leave this as the default",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have to pad at all? And are we sure this is compatible with the limits on padding that the Fil+ program set? Whatever that limit is, I think it would be good to either document it here, or check elsewhere to ensure that min-piece-size isn't set high enough to cause an issue.

Value: "1MiB",
DefaultText: "1MiB",
},
&cli.BoolFlag{
Name: "delete-after-export",
Usage: "Whether to delete the source files after export to CAR files",
Expand Down Expand Up @@ -83,6 +89,7 @@ var CreateCmd = &cli.Command{
outputStorages := c.StringSlice("output")
maxSizeStr := c.String("max-size")
pieceSizeStr := c.String("piece-size")
minPieceSizeStr := c.String("min-piece-size")
for _, sourcePath := range c.StringSlice("local-source") {
source, err := createStorageIfNotExist(c.Context, db, sourcePath)
if err != nil {
Expand All @@ -103,8 +110,9 @@ var CreateCmd = &cli.Command{
OutputStorages: outputStorages,
MaxSizeStr: maxSizeStr,
PieceSizeStr: pieceSizeStr,
DeleteAfterExport: c.Bool("delete-after-export"),
MinPieceSizeStr: minPieceSizeStr,
Name: name,
DeleteAfterExport: c.Bool("delete-after-export"),
NoInline: c.Bool("no-inline"),
NoDag: c.Bool("no-dag"),
})
Expand Down
6 changes: 5 additions & 1 deletion cmd/functional_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ func TestDataPrep(t *testing.T) {
require.Equal(t, pieceCID, calculatedPieceCID)
err = os.WriteFile(filepath.Join(downloadDir, pieceCID+".car"), downloaded, 0777)
require.NoError(t, err)

// Verify piece size is a power of two
pieceSize := uint64(len(downloaded))
require.True(t, util.IsPowerOfTwo(pieceSize), "piece size %d is not a power of two", pieceSize)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just testing the default value?

}

// Download all pieces using local download server
Expand Down Expand Up @@ -499,7 +503,7 @@ func TestNoDuplicatedOutput(t *testing.T) {
_, _, err = runner.Run(ctx, fmt.Sprintf("singularity storage create local --name source --path %s", testutil.EscapePath(source)))
require.NoError(t, err)

_, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB", testutil.EscapePath(output)))
_, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB --min-piece-size=256KiB", testutil.EscapePath(output)))
require.NoError(t, err)

// Start scanning
Expand Down
1 change: 1 addition & 0 deletions docs/en/cli-reference/prep/create.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions docs/swagger/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions docs/swagger/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions docs/swagger/swagger.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions handler/dataprep/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type CreateRequest struct {
OutputStorages []string `json:"outputStorages"` // Name of Output storage systems to be used for the output
MaxSizeStr string `default:"31.5GiB" json:"maxSize"` // Maximum size of the CAR files to be created
PieceSizeStr string `default:"" json:"pieceSize"` // Target piece size of the CAR files used for piece commitment calculation
MinPieceSizeStr string `default:"1MiB" json:"minPieceSize"` // Minimum piece size for the preparation, applies only to DAG and remainer pieces
DeleteAfterExport bool `default:"false" json:"deleteAfterExport"` // Whether to delete the source files after export
NoInline bool `default:"false" json:"noInline"` // Whether to disable inline storage for the preparation. Can save database space but requires at least one output storage.
NoDag bool `default:"false" json:"noDag"` // Whether to disable maintaining folder dag structure for the sources. If disabled, DagGen will not be possible and folders will not have an associated CID.
Expand Down Expand Up @@ -77,6 +78,24 @@ func ValidateCreateRequest(ctx context.Context, db *gorm.DB, request CreateReque
return nil, errors.Wrap(handlererror.ErrInvalidParameter, "maxSize needs to be reduced to leave space for padding")
}

minPieceSizeStr := request.MinPieceSizeStr
if minPieceSizeStr == "" {
minPieceSizeStr = "1MiB"
}

minPieceSize, err := humanize.ParseBytes(minPieceSizeStr)
if err != nil {
return nil, errors.Join(handlererror.ErrInvalidParameter, errors.Wrapf(err, "invalid value for minPieceSize: %s", minPieceSizeStr))
}

if minPieceSize > pieceSize {
return nil, errors.Wrap(handlererror.ErrInvalidParameter, "minPieceSize cannot be larger than pieceSize")
}

if minPieceSize != util.NextPowerOfTwo(minPieceSize) {
return nil, errors.Wrap(handlererror.ErrInvalidParameter, "minPieceSize must be a power of two")
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to add a check here to ensure the min-piece-size never over-pads in a way that makes things non-compliant.


var sources []model.Storage
for _, name := range request.SourceStorages {
var source model.Storage
Expand Down Expand Up @@ -114,6 +133,7 @@ func ValidateCreateRequest(ctx context.Context, db *gorm.DB, request CreateReque
return &model.Preparation{
MaxSize: int64(maxSize),
PieceSize: int64(pieceSize),
MinPieceSize: int64(minPieceSize),
SourceStorages: sources,
OutputStorages: outputs,
DeleteAfterExport: request.DeleteAfterExport,
Expand Down
1 change: 1 addition & 0 deletions handler/dataprep/piece.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ func (DefaultHandler) AddPieceHandler(
StoragePath: request.FilePath,
PreparationID: preparation.ID,
FileSize: fileSize,
PieceType: model.DataPiece,
}

err = database.DoRetry(ctx, func() error { return db.Create(&mCar).Error })
Expand Down
2 changes: 1 addition & 1 deletion handler/job/pack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func TestPackHandler_Success(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, car)
require.EqualValues(t, 100, car.FileSize)
require.EqualValues(t, "baga6ea4seaqbuglmtahbspkbeunqohciieh4yjivfhcqawufwgs4gt7mzmyfmmi", car.PieceCID.String())
require.EqualValues(t, "baga6ea4seaqpikooah5wmbpjmnvx3ysyf36xagymjtbccnf5twt2cpaqcgcwqha", car.PieceCID.String())
err = db.Find(&job, 1).Error
require.NoError(t, err)
require.Equal(t, model.Complete, job.State)
Expand Down
18 changes: 18 additions & 0 deletions model/preparation.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ import (
"gorm.io/gorm"
)

type PieceType string

const (
DataPiece PieceType = "data"
DagPiece PieceType = "dag"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you document the difference between these two? Data and DAG are both very overloaded terms in this space.

)

type Worker struct {
ID string `gorm:"primaryKey" json:"id"`
LastHeartbeat time.Time `json:"lastHeartbeat"`
Expand All @@ -34,6 +41,7 @@ type Preparation struct {
DeleteAfterExport bool `json:"deleteAfterExport"` // DeleteAfterExport is a flag that indicates whether the source files should be deleted after export.
MaxSize int64 `json:"maxSize"`
PieceSize int64 `json:"pieceSize"`
MinPieceSize int64 `json:"minPieceSize"` // Minimum piece size for the preparation, applies only to DAG and remainder pieces
NoInline bool `json:"noInline"`
NoDag bool `json:"noDag"`

Expand Down Expand Up @@ -252,6 +260,7 @@ type CarID uint32
type Car struct {
ID CarID `cbor:"-" gorm:"primaryKey" json:"id" table:"verbose"`
CreatedAt time.Time `cbor:"-" json:"createdAt" table:"verbose;format:2006-01-02 15:04:05"`
PieceType PieceType `cbor:"0,keyasint,omitempty" json:"pieceType" swaggertype:"string"` // PieceType indicates whether this is a data piece or DAG piece
PieceCID CID `cbor:"1,keyasint,omitempty" gorm:"column:piece_cid;index;type:bytes;size:255" json:"pieceCid" swaggertype:"string"`
PieceSize int64 `cbor:"2,keyasint,omitempty" json:"pieceSize"`
RootCID CID `cbor:"3,keyasint,omitempty" gorm:"column:root_cid;type:bytes" json:"rootCid" swaggertype:"string"`
Expand Down Expand Up @@ -319,3 +328,12 @@ func (c CarBlock) BlockLength() int32 {

return c.blockLength
}

// GetMinPieceSize returns the minimum piece size for the preparation, with a fallback to 1MiB if not set.
// This ensures backward compatibility with older preparations that don't have minPieceSize set.
func (p *Preparation) GetMinPieceSize() int64 {
if p.MinPieceSize == 0 {
return 1 << 20 // 1MiB
}
return p.MinPieceSize
}
Loading
Loading