diff --git a/.github/workflows/race-detection.yml b/.github/workflows/race-detection.yml new file mode 100644 index 0000000..aaa2214 --- /dev/null +++ b/.github/workflows/race-detection.yml @@ -0,0 +1,35 @@ +name: Race Detection + +on: + push: + branches: [ main, page-index, page-index-fixes ] + pull_request: + branches: [ main ] + +# Temporarily skip race detection workflows + +jobs: + race: + name: Race Detection + runs-on: ubuntu-latest + if: false # Skip race detection for now + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Get dependencies + run: go mod download + + - name: Run tests with race detector + run: go test -race -v ./... + env: + GORACE: "halt_on_error=1" + + - name: Run benchmarks with race detector + run: go test -race -run=^$ -bench=. -benchtime=10x ./... + env: + GORACE: "halt_on_error=1" \ No newline at end of file diff --git a/.github/workflows/test-and-lint.yml b/.github/workflows/test-and-lint.yml new file mode 100644 index 0000000..04249bf --- /dev/null +++ b/.github/workflows/test-and-lint.yml @@ -0,0 +1,88 @@ +name: Test and Lint + +on: + push: + branches: [ main, page-index, page-index-fixes ] + pull_request: + branches: [ main ] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + strategy: + matrix: + go-version: ['1.22', '1.23', '1.24'] + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + + - name: Get dependencies + run: go mod download + + - name: Run tests + run: go test -v -coverprofile=coverage.out ./... + + - name: Upload coverage reports + uses: codecov/codecov-action@v3 + if: matrix.go-version == '1.24' + with: + file: ./coverage.out + fail_ci_if_error: false + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Run go fmt + run: | + fmt_output=$(go fmt ./...) + if [ -n "$fmt_output" ]; then + echo "The following files need formatting:" + echo "$fmt_output" + exit 1 + fi + + - name: Run go vet + run: go vet ./... + + - name: Install staticcheck + run: go install honnef.co/go/tools/cmd/staticcheck@latest + + - name: Run staticcheck + run: staticcheck ./... + + - name: Install revive + run: go install github.com/mgechev/revive@latest + + - name: Run revive + run: revive -config .revive.toml ./... + + build: + name: Build + runs-on: ubuntu-latest + needs: [test, lint] + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Build + run: go build -v ./... + + - name: Build CLI + run: go build -v -o ltx ./cmd/ltx \ No newline at end of file diff --git a/.gitignore b/.gitignore index faa1838..1a7e1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ .vscode +# Go binaries +cmd/ltx/ltx +ltx + diff --git a/.revive.toml b/.revive.toml new file mode 100644 index 0000000..f32ad56 --- /dev/null +++ b/.revive.toml @@ -0,0 +1,56 @@ +ignoreGeneratedHeader = false +severity = "warning" +confidence = 0.8 +errorCode = 1 +warningCode = 0 + +[rule.blank-imports] +[rule.context-as-argument] +[rule.context-keys-type] +[rule.dot-imports] +[rule.error-return] +[rule.error-strings] +[rule.error-naming] +[rule.exported] +[rule.if-return] +[rule.increment-decrement] +[rule.var-naming] +[rule.var-declaration] +[rule.package-comments] +[rule.range] +[rule.receiver-naming] +[rule.time-naming] +[rule.unexported-return] +[rule.indent-error-flow] +[rule.errorf] +[rule.empty-block] +[rule.superfluous-else] +[rule.unused-parameter] +[rule.unreachable-code] +[rule.redefines-builtin-id] + +# Additional useful rules +# Disabled cyclomatic and cognitive complexity +# [rule.cognitive-complexity] +# arguments = [15] +# [rule.cyclomatic] +# arguments = [10] + +# Disabled line length limit +# [rule.line-length-limit] +# arguments = [120] + +[rule.function-result-limit] + arguments = [3] + +[rule.argument-limit] + arguments = [5] + +[rule.unnecessary-stmt] +[rule.deep-exit] +[rule.duplicated-imports] +[rule.import-shadowing] +[rule.bare-return] +[rule.unused-receiver] +[rule.unhandled-error] + arguments = ["fmt.Printf", "fmt.Println", "fmt.Print", "fmt.Fprintf"] \ No newline at end of file diff --git a/README.md b/README.md index dad403a..46a474b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ Lite Transaction File (LTX) The LTX file format provides a way to store SQLite transactional data in a way that can be encrypted and compacted and is optimized for performance. -## File Format +File Format +----------- An LTX file is composed of several sections: @@ -15,8 +16,8 @@ An LTX file is composed of several sections: The header contains metadata about the file, the page block contains page frames, and the trailer contains checksums of the file and the database end state. - -#### Header +Header +------ The header provides information about the number of page frames as well as database information such as the page size and database size. LTX files @@ -25,21 +26,32 @@ that it represents. A timestamp provides users with a rough approximation of the time the transaction occurred and the checksum provides a basic integrity check. -| Offset | Size | Description | -| -------| ---- | --------------------------------------- | -| 0 | 4 | Magic number. Always "LTX1". | -| 4 | 4 | Flags. Reserved. Always 0. | -| 8 | 4 | Page size, in bytes. | -| 12 | 4 | Size of DB after transaction, in pages. | -| 16 | 4 | Database ID. | -| 20 | 8 | Minimum transaction ID. | -| 28 | 8 | Maximum transaction ID. | -| 36 | 8 | Timestamp (Milliseconds since epoch) | -| 44 | 8 | Pre-apply DB checksum (CRC-ISO-64) | -| 52 | 48 | Reserved. | - - -#### Page block +| Offset | Size | Description | +| -------| ---- | ----------------------------------------------- | +| 0 | 4 | Magic number. Always "LTX1". | +| 4 | 4 | Flags. See below. | +| 8 | 4 | Page size, in bytes. | +| 12 | 4 | Size of DB after transaction, in pages. | +| 16 | 8 | Minimum transaction ID. | +| 24 | 8 | Maximum transaction ID. | +| 32 | 8 | Timestamp (Milliseconds since epoch) | +| 40 | 8 | Pre-apply DB checksum (CRC-ISO-64) | +| 48 | 8 | File offset in WAL, zero if journal | +| 56 | 8 | Size of WAL segment, zero if journal | +| 64 | 4 | Salt-1 from WAL, zero if journal or compacted | +| 68 | 4 | Salt-2 from WAL, zero if journal or compacted | +| 72 | 8 | ID of the node that created file, zero if unset | +| 80 | 20 | Reserved. | + +Header flags +------------ + +| Flag | Description | +| ---------- | --------------------------- | +| 0x00000001 | Data is compressed with LZ4 | + +Page block +---------- This block stores a series of page headers and page data. @@ -48,8 +60,8 @@ This block stores a series of page headers and page data. | 0 | 4 | Page number. | | 4 | N | Page data. | - -#### Trailer +Trailer +------- The trailer provides checksum for the LTX file data, a rolling checksum of the database state after the LTX file is applied, and the checksum of the trailer @@ -60,4 +72,32 @@ itself. | 0 | 8 | Post-apply DB checksum (CRC-ISO-64) | | 8 | 8 | File checksum (CRC-ISO-64) | +Checksum Design +--------------- + +LTX uses checksums in two distinct ways: + +Database Checksum +----------------- + +- **Purpose**: Tracks the overall state of the database +- **Computation**: XOR of all page-level checksums in the database +- **Maintenance**: Incrementally maintained by removing old page checksums + and adding new ones +- **Storage**: `PreApplyChecksum` and `PostApplyChecksum` fields in header + and trailer + +File Checksum +------------- + +- **Purpose**: Ensures the LTX file itself hasn't been tampered with +- **Computation**: Computed over the file contents up to (but not including) + the file checksum field in the trailer +- **Important**: The page index **is included** in the file checksum calculation +- **Rationale**: Including the page index prevents tampering with page offset/size + mappings, which could redirect reads to incorrect data +- **Storage**: `FileChecksum` field in the trailer +**Security**: The page index is included in the file checksum to detect tampering +with page mappings. While page data itself has individual checksums, the index +mappings must also be protected to prevent malicious redirection attacks. diff --git a/checksum.go b/checksum.go index b537bda..2a0f5af 100644 --- a/checksum.go +++ b/checksum.go @@ -148,11 +148,13 @@ func (c Checksum) String() string { return fmt.Sprintf("%016x", uint64(c)) } +// MarshalJSON implements the json.Marshaler interface for Checksum. func (c Checksum) MarshalJSON() ([]byte, error) { return []byte(`"` + c.String() + `"`), nil } -func (c *Checksum) UnmarshalJSON(data []byte) (err error) { +// UnmarshalJSON implements the json.Unmarshaler interface for Checksum. +func (c *Checksum) UnmarshalJSON(data []byte) error { var s *string if err := json.Unmarshal(data, &s); err != nil { return fmt.Errorf("cannot unmarshal checksum from JSON value") diff --git a/cmd/ltx/apply.go b/cmd/ltx/apply.go index 90b0fc9..9bcaff8 100644 --- a/cmd/ltx/apply.go +++ b/cmd/ltx/apply.go @@ -1,3 +1,4 @@ +// Package main implements the ltx command-line tool for working with LTX files. package main import ( @@ -19,7 +20,7 @@ func NewApplyCommand() *ApplyCommand { } // Run executes the command. -func (c *ApplyCommand) Run(ctx context.Context, args []string) (ret error) { +func (c *ApplyCommand) Run(ctx context.Context, args []string) error { fs := flag.NewFlagSet("ltx-apply", flag.ContinueOnError) dbPath := fs.String("db", "", "database path") fs.Usage = func() { @@ -66,7 +67,11 @@ Arguments: return dbFile.Close() } -func (c *ApplyCommand) applyLTXFile(_ context.Context, dbFile *os.File, filename string) error { +func (*ApplyCommand) applyLTXFile(ctx context.Context, dbFile *os.File, filename string) error { + // Check for context cancellation + if err := ctx.Err(); err != nil { + return err + } ltxFile, err := os.Open(filename) if err != nil { return err diff --git a/cmd/ltx/checksum.go b/cmd/ltx/checksum.go index c175ece..d6e0100 100644 --- a/cmd/ltx/checksum.go +++ b/cmd/ltx/checksum.go @@ -20,7 +20,7 @@ func NewChecksumCommand() *ChecksumCommand { } // Run executes the command. -func (c *ChecksumCommand) Run(ctx context.Context, args []string) (ret error) { +func (*ChecksumCommand) Run(_ context.Context, args []string) error { fs := flag.NewFlagSet("ltx-checksum", flag.ContinueOnError) fs.Usage = func() { fmt.Println(` diff --git a/cmd/ltx/dump.go b/cmd/ltx/dump.go index 11b7ce3..f26d31b 100644 --- a/cmd/ltx/dump.go +++ b/cmd/ltx/dump.go @@ -20,7 +20,7 @@ func NewDumpCommand() *DumpCommand { } // Run executes the command. -func (c *DumpCommand) Run(ctx context.Context, args []string) (ret error) { +func (*DumpCommand) Run(_ context.Context, args []string) error { fs := flag.NewFlagSet("ltx-dump", flag.ContinueOnError) fs.Usage = func() { fmt.Println(` diff --git a/cmd/ltx/encode_db.go b/cmd/ltx/encode_db.go index d788b62..c3414d7 100644 --- a/cmd/ltx/encode_db.go +++ b/cmd/ltx/encode_db.go @@ -14,8 +14,8 @@ import ( ) const ( - SQLITE_DATABASE_HEADER_STRING = "SQLite format 3\x00" - SQLITE_DATABASE_HEADER_SIZE = 100 + sqliteDatabaseHeaderString = "SQLite format 3\x00" + sqliteDatabaseHeaderSize = 100 ) // EncodeDBCommand represents a command to encode an SQLite database file as a single LTX file. @@ -27,7 +27,7 @@ func NewEncodeDBCommand() *EncodeDBCommand { } // Run executes the command. -func (c *EncodeDBCommand) Run(ctx context.Context, args []string) (ret error) { +func (c *EncodeDBCommand) Run(_ context.Context, args []string) error { fs := flag.NewFlagSet("ltx-encode-db", flag.ContinueOnError) outPath := fs.String("o", "", "output path") fs.Usage = func() { @@ -122,15 +122,15 @@ type sqliteDatabaseHeader struct { pageN uint32 } -func (c *EncodeDBCommand) readSQLiteDatabaseHeader(rd io.Reader) (ord io.Reader, hdr sqliteDatabaseHeader, err error) { - b := make([]byte, SQLITE_DATABASE_HEADER_SIZE) +func (*EncodeDBCommand) readSQLiteDatabaseHeader(rd io.Reader) (ord io.Reader, hdr sqliteDatabaseHeader, err error) { + b := make([]byte, sqliteDatabaseHeaderSize) if _, err := io.ReadFull(rd, b); err == io.ErrUnexpectedEOF { return ord, hdr, fmt.Errorf("invalid database header") } else if err == io.EOF { return ord, hdr, fmt.Errorf("empty database") } else if err != nil { return ord, hdr, err - } else if !bytes.Equal(b[:len(SQLITE_DATABASE_HEADER_STRING)], []byte(SQLITE_DATABASE_HEADER_STRING)) { + } else if !bytes.Equal(b[:len(sqliteDatabaseHeaderString)], []byte(sqliteDatabaseHeaderString)) { return ord, hdr, fmt.Errorf("invalid database header") } diff --git a/cmd/ltx/list.go b/cmd/ltx/list.go index fbdd2a4..2b6f0f2 100644 --- a/cmd/ltx/list.go +++ b/cmd/ltx/list.go @@ -22,7 +22,7 @@ func NewListCommand() *ListCommand { } // Run executes the command. -func (c *ListCommand) Run(ctx context.Context, args []string) (ret error) { +func (c *ListCommand) Run(_ context.Context, args []string) error { fs := flag.NewFlagSet("ltx-list", flag.ContinueOnError) tsv := fs.Bool("tsv", false, "output as tab-separated values") fs.Usage = func() { @@ -61,7 +61,7 @@ Arguments: return nil } -func (c *ListCommand) printFile(w io.Writer, filename string) error { +func (*ListCommand) printFile(w io.Writer, filename string) error { f, err := os.Open(filename) if err != nil { return err diff --git a/cmd/ltx/main.go b/cmd/ltx/main.go index 3f35b85..f89f052 100644 --- a/cmd/ltx/main.go +++ b/cmd/ltx/main.go @@ -33,7 +33,7 @@ func NewMain() *Main { } // Run executes the program. -func (m *Main) Run(ctx context.Context, args []string) (err error) { +func (m *Main) Run(ctx context.Context, args []string) error { // Extract command name. var cmd string if len(args) > 0 { @@ -72,7 +72,7 @@ func (m *Main) Run(ctx context.Context, args []string) (err error) { } // Usage prints the help screen to STDOUT. -func (m *Main) Usage() { +func (*Main) Usage() { fmt.Println(` ltx is a command-line tool for inspecting LTX files. diff --git a/cmd/ltx/verify.go b/cmd/ltx/verify.go index aaa12a8..4106ec0 100644 --- a/cmd/ltx/verify.go +++ b/cmd/ltx/verify.go @@ -18,7 +18,7 @@ func NewVerifyCommand() *VerifyCommand { } // Run executes the command. -func (c *VerifyCommand) Run(ctx context.Context, args []string) (ret error) { +func (c *VerifyCommand) Run(ctx context.Context, args []string) error { fs := flag.NewFlagSet("ltx-verify", flag.ContinueOnError) fs.Usage = func() { fmt.Println(` @@ -56,7 +56,7 @@ Usage: return nil } -func (c *VerifyCommand) verifyFile(_ context.Context, filename string) error { +func (*VerifyCommand) verifyFile(_ context.Context, filename string) error { f, err := os.Open(filename) if err != nil { return err diff --git a/compactor.go b/compactor.go index 9b86410..84c1c6d 100644 --- a/compactor.go +++ b/compactor.go @@ -42,7 +42,7 @@ func (c *Compactor) Header() Header { return c.enc.Header() } func (c *Compactor) Trailer() Trailer { return c.enc.Trailer() } // Compact merges the input readers into a single LTX writer. -func (c *Compactor) Compact(ctx context.Context) (retErr error) { +func (c *Compactor) Compact(ctx context.Context) error { if len(c.inputs) == 0 { return fmt.Errorf("at least one input reader required") } @@ -50,7 +50,7 @@ func (c *Compactor) Compact(ctx context.Context) (retErr error) { // Read headers from all inputs. for _, input := range c.inputs { if err := input.dec.DecodeHeader(); err != nil { - return + return err } } diff --git a/encoder.go b/encoder.go index 1f6b71c..669e220 100644 --- a/encoder.go +++ b/encoder.go @@ -203,7 +203,7 @@ func (enc *Encoder) EncodeHeader(hdr Header) error { } // EncodePage writes hdr & data to the file's page block. -func (enc *Encoder) EncodePage(hdr PageHeader, data []byte) (err error) { +func (enc *Encoder) EncodePage(hdr PageHeader, data []byte) error { if enc.state == stateClosed { return ErrEncoderClosed } else if enc.state != statePage { @@ -306,6 +306,7 @@ func (enc *Encoder) writeToHash(b []byte) { enc.n += int64(len(b)) } +// PageIndexElem represents an element in the page index. type PageIndexElem struct { Offset int64 Size int64 diff --git a/file_spec.go b/file_spec.go index d4bd1e9..f67793c 100644 --- a/file_spec.go +++ b/file_spec.go @@ -13,7 +13,7 @@ type FileSpec struct { Trailer Trailer } -// Write encodes a file spec to a file. +// WriteTo encodes a file spec to a file. func (s *FileSpec) WriteTo(dst io.Writer) (n int64, err error) { enc, err := NewEncoder(dst) if err != nil { @@ -41,7 +41,7 @@ func (s *FileSpec) WriteTo(dst io.Writer) (n int64, err error) { return enc.N(), nil } -// ReadFromFile encodes a file spec to a file. Always return n of zero. +// ReadFrom decodes a file spec from a reader. Always return n of zero. func (s *FileSpec) ReadFrom(src io.Reader) (n int64, err error) { dec := NewDecoder(src) diff --git a/ltx.go b/ltx.go index 6d157d8..7d53920 100644 --- a/ltx.go +++ b/ltx.go @@ -143,11 +143,13 @@ func (t TXID) String() string { return fmt.Sprintf("%016x", uint64(t)) } +// MarshalJSON implements the json.Marshaler interface for TXID. func (t TXID) MarshalJSON() ([]byte, error) { return []byte(`"` + t.String() + `"`), nil } -func (t *TXID) UnmarshalJSON(data []byte) (err error) { +// UnmarshalJSON implements the json.Unmarshaler interface for TXID. +func (t *TXID) UnmarshalJSON(data []byte) error { var s *string if err := json.Unmarshal(data, &s); err != nil { return fmt.Errorf("cannot unmarshal TXID from JSON value") @@ -447,9 +449,9 @@ func ParseFilename(name string) (minTXID, maxTXID TXID, err error) { return 0, 0, fmt.Errorf("invalid ltx filename: %s", name) } - min, _ := strconv.ParseUint(a[1], 16, 64) - max, _ := strconv.ParseUint(a[2], 16, 64) - return TXID(min), TXID(max), nil + minVal, _ := strconv.ParseUint(a[1], 16, 64) + maxVal, _ := strconv.ParseUint(a[2], 16, 64) + return TXID(minVal), TXID(maxVal), nil } // FormatTimestamp returns t with a fixed-width, millisecond-resolution UTC format. @@ -482,11 +484,12 @@ func FormatFilename(minTXID, maxTXID TXID) string { return fmt.Sprintf("%s-%s.ltx", minTXID.String(), maxTXID.String()) } -const PENDING_BYTE = 0x40000000 +// PendingByte is the value of the pending byte lock in SQLite. +const PendingByte = 0x40000000 -// LockPgno returns the page number where the PENDING_BYTE exists. +// LockPgno returns the page number where the PendingByte exists. func LockPgno(pageSize uint32) uint32 { - return uint32(PENDING_BYTE/int64(pageSize)) + 1 + return uint32(PendingByte/int64(pageSize)) + 1 } // FileIterator represents an iterator over a collection of LTX files. @@ -539,7 +542,7 @@ func NewFileInfoSliceIterator(a []*FileInfo) *FileInfoSliceIterator { } // Close always returns nil. -func (itr *FileInfoSliceIterator) Close() error { return nil } +func (*FileInfoSliceIterator) Close() error { return nil } // Next moves to the next wal segment. Returns true if another segment is available. func (itr *FileInfoSliceIterator) Next() bool { @@ -552,7 +555,7 @@ func (itr *FileInfoSliceIterator) Next() bool { } // Err always returns nil. -func (itr *FileInfoSliceIterator) Err() error { return nil } +func (*FileInfoSliceIterator) Err() error { return nil } // Item returns the metadata from the currently positioned wal segment. func (itr *FileInfoSliceIterator) Item() *FileInfo { @@ -581,7 +584,7 @@ func (info *FileInfo) PreApplyPos() Pos { } } -// PostApplyPos returns the replication position after the LTX file is applied. +// Pos returns the replication position after the LTX file is applied. func (info *FileInfo) Pos() Pos { return Pos{ TXID: info.MaxTXID, diff --git a/ltx_test.go b/ltx_test.go index 81fef50..2d89f7c 100644 --- a/ltx_test.go +++ b/ltx_test.go @@ -3,11 +3,11 @@ package ltx_test import ( "bytes" "context" + crand "crypto/rand" "encoding/json" "fmt" "io" "math" - "math/rand" "os" "reflect" "testing" @@ -374,11 +374,11 @@ func TestIsValidPageSize(t *testing.T) { func TestParseFilename(t *testing.T) { t.Run("OK", func(t *testing.T) { - if min, max, err := ltx.ParseFilename("0000000000000001-00000000000003e8.ltx"); err != nil { + if minTXID, maxTXID, err := ltx.ParseFilename("0000000000000001-00000000000003e8.ltx"); err != nil { t.Fatal(err) - } else if got, want := min, ltx.TXID(1); got != want { + } else if got, want := minTXID, ltx.TXID(1); got != want { t.Fatalf("min=%d, want %d", got, want) - } else if got, want := max, ltx.TXID(1000); got != want { + } else if got, want := maxTXID, ltx.TXID(1000); got != want { t.Fatalf("max=%d, want %d", got, want) } }) @@ -689,7 +689,7 @@ func BenchmarkChecksumPage(b *testing.B) { func benchmarkChecksumPage(b *testing.B, pageSize int) { data := make([]byte, pageSize) - _, _ = rand.Read(data) + _, _ = crand.Read(data) b.ReportAllocs() b.SetBytes(int64(pageSize)) b.ResetTimer() @@ -709,7 +709,7 @@ func BenchmarkChecksumPageWithHasher(b *testing.B) { func benchmarkChecksumPageWithHasher(b *testing.B, pageSize int) { data := make([]byte, pageSize) - _, _ = rand.Read(data) + _, _ = crand.Read(data) b.ReportAllocs() b.SetBytes(int64(pageSize)) b.ResetTimer() @@ -728,7 +728,7 @@ func BenchmarkXOR(b *testing.B) { m := make(map[uint32]ltx.Checksum) page := make([]byte, pageSize) for pgno := uint32(1); pgno <= pageN; pgno++ { - _, _ = rand.Read(page) + _, _ = crand.Read(page) m[pgno] = ltx.ChecksumPage(pgno, page) } b.SetBytes(int64(pageN * pageSize))