Skip to content
This repository was archived by the owner on Nov 2, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct itemx {
uint8_t md[20]; /* sha1 message digest */
uint32_t sid; /* owner slab id */
uint32_t offset; /* item offset from owner slab base */
rel_time_t expiry; /* expiry in secs */
uint64_t cas; /* cas */
} __attribute__ ((__packed__));
```
Expand All @@ -54,9 +55,9 @@ Each index entry contains both object-specific information (key name, &c.) and d

To further reduce the memory consumed by the index, we store the SHA-1 hash of the key in each index entry, instead of the key itself. The SHA-1 hash acts as the unique identifier for each object. The on-disk object format contains the complete object key and value. False positives from SHA-1 hash collisions are detected after object retrieval from the disk by comparison with the requested key. If there are collisions on the write path, new objects with the same hash key simply overwrite previous objects.

The index entry (struct itemx) on a 64-bit system is 44 bytes in size. It is possible to further reduce index entry size to 28 bytes, if CAS is unsupported, MD5 hashing is used, and the next pointer is reduced to 4 bytes.
The index entry (struct itemx) on a 64-bit system is 48 bytes in size. It is possible to further reduce index entry size to 32 bytes, if CAS is unsupported, MD5 hashing is used, and the next pointer is reduced to 4 bytes.

At this point, it is instructive to consider the relative size of fatcache's index and the on-disk data. With a 44 byte index entry, an index consuming 44 MB of memory can address 1M objects. If the average object size is 1 KB, then a 44 MB index can address 1 GB of on-disk storage - a 23x memory overcommit. If the average object size is 500 bytes, then a 44 MB index can address 500 MB of SSD - a 11x memory overcommit. Index size and object size relate in this way to determine the addressable capacity of the SSD.
At this point, it is instructive to consider the relative size of fatcache's index and the on-disk data. With a 44 byte index entry, an index consuming 48 MB of memory can address 1M objects. If the average object size is 1 KB, then a 48 MB index can address 1 GB of on-disk storage - a 23x memory overcommit. If the average object size is 500 bytes, then a 48 MB index can address 500 MB of SSD - a 11x memory overcommit. Index size and object size relate in this way to determine the addressable capacity of the SSD.

## Build

Expand Down
22 changes: 2 additions & 20 deletions src/fc_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,6 @@ extern struct settings settings;

static uint64_t cas_id;

/*
* Return true if the item has expired, otherwise return false. Items
* with expiry of 0 are considered as unexpirable.
*/
bool
item_expired(struct item *it)
{
ASSERT(it->magic == ITEM_MAGIC);

if(it->expiry != 0 && it->expiry < time_now()) {
itemx_removex(it->hash, it->md);
return true;
} else {
return false;
}
}

/*
* Return the owner slab of item it.
*/
Expand Down Expand Up @@ -96,7 +79,6 @@ item_get(uint8_t *key, uint8_t nkey, uint8_t cid, uint32_t ndata,
it->cid = cid;
it->nkey = nkey;
it->ndata = ndata;
it->expiry = expiry;
it->flags = flags;
fc_memcpy(it->md, md, sizeof(it->md));
it->hash = hash;
Expand All @@ -105,9 +87,9 @@ item_get(uint8_t *key, uint8_t nkey, uint8_t cid, uint32_t ndata,

log_debug(LOG_VERB, "get it '%.*s' at offset %"PRIu32" with cid %"PRIu8
" expiry %u", it->nkey, item_key(it), it->offset, it->cid,
it->expiry);
expiry);

itemx_putx(it->hash, it->md, it->sid, it->offset, ++cas_id);
itemx_putx(it->hash, it->md, it->sid, it->offset, expiry, ++cas_id);

return it;
}
Expand Down
2 changes: 0 additions & 2 deletions src/fc_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ struct item {
uint8_t unused[2]; /* unused */
uint8_t nkey; /* key length */
uint32_t ndata; /* date length */
rel_time_t expiry; /* expiry in secs */
uint32_t flags; /* flags opaque to the server */
uint8_t md[20]; /* key message digest */
uint32_t hash; /* key hash */
Expand Down Expand Up @@ -88,7 +87,6 @@ item_data(struct item *it)
return it->end + it->nkey;
}

bool item_expired(struct item *it);
struct slab *item_to_slab(struct item *it);
uint8_t item_slabcid(uint8_t nkey, uint32_t ndata);

Expand Down
23 changes: 22 additions & 1 deletion src/fc_itemx.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,26 @@ static struct itemx_tqh free_itemxq; /* free itemx q */
static struct itemx *istart; /* itemx memory start */
static struct itemx *iend; /* itemx memory end */

/*
* Return true if the itemx has expired, otherwise return false. Itemx
* with expiry of 0 are considered as unexpirable.
*/
bool
itemx_expired(struct itemx *itx)
{
uint32_t hash;

ASSERT(itx != NULL);

if(itx->expiry != 0 && itx->expiry < time_now()) {
hash = sha1_hash(itx->md);
itemx_removex(hash, itx->md);
return true;
} else {
return false;
}
}

/*
* Returns true, if there are no free item indexes, otherwise
* return false.
Expand Down Expand Up @@ -178,7 +198,7 @@ itemx_getx(uint32_t hash, uint8_t *md)

void
itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t offset,
uint64_t cas)
rel_time_t expiry, uint64_t cas)
{
struct itemx *itx;
struct itemx_tqh *bucket;
Expand All @@ -188,6 +208,7 @@ itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t offset,
itx = itemx_get();
itx->sid = sid;
itx->offset = offset;
itx->expiry = expiry;
itx->cas = cas;
fc_memcpy(itx->md, md, sizeof(itx->md));

Expand Down
4 changes: 3 additions & 1 deletion src/fc_itemx.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct itemx {
uint8_t md[20]; /* sha1 message digest */
uint32_t sid; /* owner slab id */
uint32_t offset; /* item offset from owner slab base */
rel_time_t expiry; /* expiry in secs */
uint64_t cas; /* cas */
} __attribute__ ((__packed__));

Expand All @@ -34,8 +35,9 @@ rstatus_t itemx_init(void);
void itemx_deinit(void);

bool itemx_empty(void);
bool itemx_expired(struct itemx *itx);
struct itemx *itemx_getx(uint32_t hash, uint8_t *md);
void itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t ioff, uint64_t cas);
void itemx_putx(uint32_t hash, uint8_t *md, uint32_t sid, uint32_t ioff, rel_time_t expiry, uint64_t cas);
bool itemx_removex(uint32_t hash, uint8_t *md);

#endif
41 changes: 10 additions & 31 deletions src/fc_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ req_process_get(struct context *ctx, struct conn *conn, struct msg *msg)
return;
}

if (itemx_expired(itx)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND);
return;
}
/*
* On a hit, we read the item with address [sid, offset] and respond
* with item value if the item hasn't expired yet.
Expand All @@ -199,10 +203,6 @@ req_process_get(struct context *ctx, struct conn *conn, struct msg *msg)
rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, errno);
return;
}
if (item_expired(it)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND);
return;
}

rsp_send_value(ctx, conn, msg, it, itx->cas);
}
Expand Down Expand Up @@ -254,17 +254,12 @@ static void
req_process_add(struct context *ctx, struct conn *conn, struct msg *msg)
{
struct itemx *itx;
struct item *it;

/* add, adds only if the mapping is not present */
itx = itemx_getx(msg->hash, msg->md);
if (itx != NULL) {
it = slab_read_item(itx->sid, itx->offset);
/* if the item hasn't expired yet */
if(!item_expired(it)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
}
if (itx != NULL && !itemx_expired(itx)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
}

req_process_set(ctx, conn, msg);
Expand All @@ -274,18 +269,10 @@ static void
req_process_replace(struct context *ctx, struct conn *conn, struct msg *msg)
{
struct itemx *itx;
struct item *it;

/* replace, only replaces if the mapping is present */
itx = itemx_getx(msg->hash, msg->md);
if (itx == NULL) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
}

/* if the item has expired */
it = slab_read_item(itx->sid, itx->offset);
if(item_expired(it)) {
if (itx == NULL || itemx_expired(itx)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
}
Expand Down Expand Up @@ -339,7 +326,7 @@ req_process_concat(struct context *ctx, struct conn *conn, struct msg *msg)

/* 1). look up existing itemx */
itx = itemx_getx(msg->hash, msg->md);
if (itx == NULL) {
if (itx == NULL || itemx_expired(itx)) {
/* 2a). miss -> return NOT_STORED */
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
Expand All @@ -351,10 +338,6 @@ req_process_concat(struct context *ctx, struct conn *conn, struct msg *msg)
rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, errno);
return;
}
if (item_expired(oit)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_STORED);
return;
}

ndata = msg->vlen + oit->ndata;
cid = item_slabcid(nkey, ndata);
Expand Down Expand Up @@ -410,7 +393,7 @@ req_process_num(struct context *ctx, struct conn *conn, struct msg *msg)

/* 1). look up existing itemx */
itx = itemx_getx(msg->hash, msg->md);
if (itx == NULL) {
if (itx == NULL || itemx_expired(itx)) {
/* 2a). miss -> return NOT_FOUND */
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND);
return;
Expand All @@ -422,10 +405,6 @@ req_process_num(struct context *ctx, struct conn *conn, struct msg *msg)
rsp_send_error(ctx, conn, msg, MSG_RSP_SERVER_ERROR, errno);
return;
}
if (item_expired(it)) {
rsp_send_status(ctx, conn, msg, MSG_RSP_NOT_FOUND);
return;
}

/* 3). sanity check item data to be a number */
status = fc_atou64(item_data(it), it->ndata, &cnum);
Expand Down