Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/borgstore/backends/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ def info(self, name) -> ItemInfo:

@abstractmethod
def load(self, name: str, *, size=None, offset=0) -> bytes:
"""load value from <name>"""
"""load value from <name>

If offset is negative, it is counted from the end of the file.
If size is None, the whole object starting at offset is loaded.
"""

@abstractmethod
def store(self, name: str, value: bytes) -> None:
Expand Down
59 changes: 59 additions & 0 deletions src/borgstore/backends/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
Utilities for backend implementations.
"""

from typing import Tuple, Optional


def make_range_header(offset: int, size: Optional[int] = None, total_size: Optional[int] = None) -> Optional[str]:
"""
Generate a standards compliant HTTP Range header.

:param offset: offset in bytes. If negative, it is counted from the end of the file.
:param size: number of bytes to load. If None, load until the end of the file.
:param total_size: total size of the file. Required if offset < 0 and size is not None.
:return: Range header value (e.g., "bytes=0-99") or None if no Range header is needed.
"""
if offset < 0:
if size is None:
return f"bytes={offset}"
else:
if total_size is None:
raise ValueError("total_size is required for negative offset with a specific size")
start = total_size + offset
return f"bytes={start}-{start + size - 1}"
else:
if size is None:
return f"bytes={offset}-" if offset > 0 else None
else:
return f"bytes={offset}-{offset + size - 1}"


def parse_range_header(range_header: str) -> Tuple[int, Optional[int]]:
"""
Parse a standards compliant HTTP Range header.
Only supports "bytes" unit and single range specs.

:param range_header: Range header value (e.g., "bytes=0-99", "bytes=100-", "bytes=-500").
:return: A tuple (offset, size). offset is negative for suffix ranges.
"""
if not range_header or not range_header.startswith("bytes="):
return 0, None

try:
range_val = range_header.split("=")[1]
if range_val.startswith("-"):
# bytes=-SUFFIX
return int(range_val), None
elif "-" in range_val:
# bytes=OFFSET- or bytes=OFFSET-END
start_str, end_str = range_val.split("-")
offset = int(start_str)
size = None
if end_str:
size = int(end_str) - offset + 1
return offset, size
except (ValueError, IndexError):
pass

return 0, None
4 changes: 2 additions & 2 deletions src/borgstore/backends/posixfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ def load(self, name, *, size=None, offset=0):
self._check_permission(name, "r")
try:
with path.open("rb") as f:
if offset > 0:
f.seek(offset)
if offset != 0:
f.seek(offset, os.SEEK_SET if offset >= 0 else os.SEEK_END)
return f.read(-1 if size is None else size)
except FileNotFoundError:
raise ObjectNotFound(name) from None
Expand Down
21 changes: 16 additions & 5 deletions src/borgstore/backends/rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
requests = None

from ._base import BackendBase, ItemInfo, validate_name
from ._utils import make_range_header
from .errors import (
BackendError,
BackendDoesNotExist,
Expand Down Expand Up @@ -258,13 +259,23 @@ def load(self, name: str, *, size=None, offset=0) -> bytes:
"""Load value from <name>."""
validate_name(name)
headers = {}
if size is not None or offset > 0:
if size is not None:
headers["Range"] = f"bytes={offset}-{offset+size-1}"
if offset < 0 and size is not None:
if -offset - size <= 1024:
# Optimization: if the part of the tail we don't need is small,
# we just request the last N bytes and truncate locally.
range_header = make_range_header(offset, size=None)
else:
headers["Range"] = f"bytes={offset}-"
info = self.info(name)
range_header = make_range_header(offset, size, info.size)
else:
range_header = make_range_header(offset, size)
if range_header:
headers["Range"] = range_header
r = self._requests(requests.get, f"{self.url}[{self.fs}]/{name}", tries=self.TRIES, headers=headers)
return r.content
content = r.content
if offset < 0 and size is not None and size < len(content):
content = content[:size]
return content

def store(self, name: str, value: bytes) -> None:
"""Store <value> into <name>."""
Expand Down
22 changes: 18 additions & 4 deletions src/borgstore/backends/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
requests = HTTPBasicAuth = None

from ._base import BackendBase, ItemInfo, validate_name
from ._utils import make_range_header
from .errors import (
ObjectNotFound,
BackendAlreadyExists,
Expand Down Expand Up @@ -169,14 +170,27 @@ def load(self, name: str, *, size=None, offset=0) -> bytes:
self._assert_open()
validate_name(name)

r_hdr = (None if not offset else f"bytes={offset}-") if size is None else f"bytes={offset}-{offset + size - 1}"
if offset < 0 and size is not None:
if -offset - size <= 1024:
# Optimization: if the part of the tail we don't need is small,
# we just request the last N bytes and truncate locally.
range_header = make_range_header(offset, size=None)
else:
info = self.info(name)
range_header = make_range_header(offset, size, info.size)
else:
range_header = make_range_header(offset, size)

headers = self.headers.copy()
if r_hdr:
headers["Range"] = r_hdr
if range_header:
headers["Range"] = range_header

response = self._request("get", self._url(name), headers=headers)
self._handle_response(response, name)
return response.content
content = response.content
if offset < 0 and size is not None and size < len(content):
content = content[:size]
return content

def store(self, name: str, value: bytes) -> None:
self._assert_open()
Expand Down
32 changes: 19 additions & 13 deletions src/borgstore/backends/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import urllib.parse

from ._base import BackendBase, ItemInfo, validate_name
from ._utils import make_range_header
from .errors import BackendError, BackendMustBeOpen, BackendMustNotBeOpen, BackendDoesNotExist, BackendAlreadyExists
from .errors import ObjectNotFound

Expand Down Expand Up @@ -187,20 +188,25 @@ def load(self, name, *, size=None, offset=0):
validate_name(name)
key = self.base_path + name
try:
if size is None and offset == 0:
if offset < 0 and size is not None:
if -offset - size <= 1024:
# Optimization: if the part of the tail we don't need is small,
# we just request the last N bytes and truncate locally.
range_header = make_range_header(offset, size=None)
else:
info = self.info(name)
range_header = make_range_header(offset, size, info.size)
else:
range_header = make_range_header(offset, size)

if range_header:
obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=range_header)
else:
obj = self.s3.get_object(Bucket=self.bucket, Key=key)
return obj["Body"].read()
elif size is not None and offset == 0:
obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes=0-{size - 1}")
return obj["Body"].read()
elif size is None and offset != 0:
head = self.s3.head_object(Bucket=self.bucket, Key=key)
length = head["ContentLength"]
obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes={offset}-{length - 1}")
return obj["Body"].read()
elif size is not None and offset != 0:
obj = self.s3.get_object(Bucket=self.bucket, Key=key, Range=f"bytes={offset}-{offset + size - 1}")
return obj["Body"].read()
content = obj["Body"].read()
if offset < 0 and size is not None and size < len(content):
content = content[:size]
return content
except self.s3.exceptions.NoSuchKey:
raise ObjectNotFound(name)

Expand Down
2 changes: 1 addition & 1 deletion src/borgstore/backends/sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def load(self, name, *, size=None, offset=0):
validate_name(name)
try:
with self.client.open(name) as f:
f.seek(offset)
f.seek(offset, 0 if offset >= 0 else 2)
f.prefetch(size) # speeds up the following read() significantly!
return f.read(size)
except FileNotFoundError:
Expand Down
14 changes: 2 additions & 12 deletions src/borgstore/server/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
BackendMustBeOpen,
BackendMustNotBeOpen,
)
from ..backends._utils import parse_range_header
from ..store import get_backend

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -251,18 +252,7 @@ def do_GET(self):

try:
range_header = self.headers.get("Range")
offset = 0
size = None
if range_header and range_header.startswith("bytes="):
# Simple Range: bytes=OFFSET- or bytes=OFFSET-END
try:
range_val = range_header.split("=")[1]
start_str, end_str = range_val.split("-")
offset = int(start_str)
if end_str:
size = int(end_str) - offset + 1
except ValueError:
pass
offset, size = parse_range_header(range_header) if range_header else (0, None)

with self.server.backend:
data = self.server.backend.load(self.name, offset=offset, size=size)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,20 @@ def test_load_partial(tested_backends, request):
assert backend.load("key", size=3) == b"012"
assert backend.load("key", offset=5) == b"56789"
assert backend.load("key", offset=4, size=4) == b"4567"
assert backend.load("key", offset=-3) == b"789"
assert backend.load("key", offset=-4, size=2) == b"67"

# test tail loading optimization (-offset - size <= 1024)
# offset=-10, size=9 -> -offset - size = 1 <= 1024 -> optimized
assert backend.load("key", offset=-10, size=9) == b"012345678"
# offset=-10, size=1 -> -offset - size = 9 <= 1024 -> optimized
assert backend.load("key", offset=-10, size=1) == b"0"
# offset=-2000, size=1000 -> -offset - size = 1000 <= 1024 -> optimized
backend.store("large", b"x" * 2000)
assert backend.load("large", offset=-2000, size=1000) == b"x" * 1000
# offset=-3000, size=1000 -> -offset - size = 2000 > 1024 -> NOT optimized
backend.store("huge", b"y" * 3000)
assert backend.load("huge", offset=-3000, size=1000) == b"y" * 1000


def test_already_exists(tested_backends, request):
Expand Down
30 changes: 30 additions & 0 deletions tests/test_backends_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from borgstore.backends._utils import make_range_header, parse_range_header
import pytest


def test_make_range_header():
# From start
assert make_range_header(0) is None
assert make_range_header(100) == "bytes=100-"
assert make_range_header(100, 50) == "bytes=100-149"
assert make_range_header(0, 50) == "bytes=0-49"

# From end
assert make_range_header(-100) == "bytes=-100"
assert make_range_header(-100, 50, 1000) == "bytes=900-949"

with pytest.raises(ValueError):
make_range_header(-100, 50)


def test_parse_range_header():
assert parse_range_header(None) == (0, None)
assert parse_range_header("") == (0, None)
assert parse_range_header("invalid") == (0, None)
assert parse_range_header("bytes=invalid") == (0, None)

assert parse_range_header("bytes=100-") == (100, None)
assert parse_range_header("bytes=100-149") == (100, 50)
assert parse_range_header("bytes=0-49") == (0, 50)

assert parse_range_header("bytes=-100") == (-100, None)
Loading