Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions c_src/riak_ensemble_clock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/********************************************************************
*
* Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
*
* This file is provided to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtaine
* a copy of the License at
*
* http: www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
********************************************************************/
#include "erl_nif.h"

#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <stdint.h>

#if defined(__MACH__) && defined(__APPLE__)
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif

static ERL_NIF_TERM ATOM_OK;
static ERL_NIF_TERM ATOM_ERROR;

#if defined(__MACH__) && defined(__APPLE__)
static mach_timebase_info_data_t timebase_info;
#endif

/*********************************************************************/

#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)
uint64_t posix_get_clock(clockid_t clock)
{
struct timespec ts;
if(clock_gettime(clock, &ts) == -1)
return 0;
return ((uint64_t)ts.tv_sec * 1000000000) + ts.tv_nsec;
}

/* Note: Prefer CLOCK_BOOTTIME on Linux where supported, as this
includes time spent in suspend. CLOCK_MONOTONIC may or may
not include time spent in suspend -- it's CPU dependent. In
practice, this shouldn't matter -- people don't typically
suspend/resume production servers while under client load.
Likewise, client TCP connections are unlikely to survive
across reasonable suspend durations.
*/

uint64_t posix_monotonic_time(void)
{
uint64_t time;
#if defined(CLOCK_BOOTTIME)
if((time = posix_get_clock(CLOCK_BOOTTIME)))
return time;
#elif defined(CLOCK_MONOTONIC)
if((time = posix_get_clock(CLOCK_MONOTONIC)))
return time;
#endif
return 0;
}
#endif

/*********************************************************************
* See Apple technical note: *
* https://developer.apple.com/library/mac/qa/qa1398/_index.html *
*********************************************************************/

/* Note: mach_absolute_time() is based on the CPU timestamp counter,
which is synchronized across all CPUs since Intel Nehalem.
Earlier CPUs do not provide this guarantee. It's unclear if
Apple provides any correction for this behavior on older CPUs.
We assume this doesn't matter in practice -- people don't use
ancient OS X machines as production servers.
*/

#if defined(__MACH__) && defined(__APPLE__)
uint64_t osx_monotonic_time(void)
{
uint64_t time;
uint64_t timeNano;

time = mach_absolute_time();

// Do the maths. We hope that the multiplication doesn't
// overflow; the price you pay for working in fixed point.

timeNano = time * timebase_info.numer / timebase_info.denom;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can improve on this by doing integer divide on timebase_info.denom and also calculating the remainder, then multiplying separately by the numerator. Then you can separately compute and add (time div denom) * numer + (remainder * number div denom). The term with the remainder is less likely to overflow and can be checked separately.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is copied from the linked Apple technical note (https://developer.apple.com/library/mac/qa/qa1398/_index.html). As used by riak_ensemble, integer overflow is a non-issue. Is it really worth it to worry about this at all?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose it's just an OSX thing, but it does look sloppy. I'm fine with improving later if an issue arises. If you do, numer and denom are 32 bit unsigned values, so the remainder term above would never overflow if you take care of casting to 64 bits first. Apparently this is mostly just 1 these days (numer == denom) according to the source in the boost chrono library.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway, for future reference, this is how I think it should be done to make it more robust to overflow:

timeNano =  (time / timebase_info.denom * timebase_info.numer) + 
     (time % timebase_info.denom) * timebase_info.numer / timebase_info.denom;


return timeNano;
}
#endif

/*********************************************************************/

static uint64_t get_monotonic_time()
{
uint64_t time = 0;

#if defined(__MACH__) && defined(__APPLE__)
time = osx_monotonic_time();
#endif

#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)
time = posix_monotonic_time();
#endif

return time;
}

/*********************************************************************/

static ERL_NIF_TERM monotonic_time(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
uint64_t time = get_monotonic_time();

if(time) {
return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, time));
}
else {
return ATOM_ERROR;
}
}

/*********************************************************************/

static ERL_NIF_TERM monotonic_time_ms(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
uint64_t time = get_monotonic_time() / 1000000;

if(time) {
return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, time));
}
else {
return ATOM_ERROR;
}
}

/*********************************************************************/

static void init(ErlNifEnv *env)
{
ATOM_OK = enif_make_atom(env, "ok");
ATOM_ERROR = enif_make_atom(env, "error");

#if defined(__MACH__) && defined(__APPLE__)
(void) mach_timebase_info(&timebase_info);
#endif
}

static int on_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
{
init(env);
return 0;
}

static int on_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,
ERL_NIF_TERM load_info)
{
init(env);
return 0;
}

static void on_unload(ErlNifEnv *env, void *priv_data)
{
}

/*********************************************************************/

static ErlNifFunc nif_funcs[] = {
{"monotonic_time", 0, monotonic_time},
{"monotonic_time_ms", 0, monotonic_time_ms}
};

ERL_NIF_INIT(riak_ensemble_clock, nif_funcs, &on_load, NULL, &on_upgrade, &on_unload)
6 changes: 6 additions & 0 deletions rebar.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@
{xref_checks, [undefined_function_calls]}.
{deps, [{lager, "2.0.3", {git, "git://github.com/basho/lager.git", {tag, "2.0.3"}}},
{eleveldb, ".*", {git, "git://github.com/basho/eleveldb.git", {branch, "develop"}}}]}.

{port_specs,
[{".*", "priv/riak_ensemble.so",
["c_src/*.c*"],
[{env, [{"CFLAGS", "$CFLAGS"}]}]
}]}.
42 changes: 42 additions & 0 deletions src/riak_ensemble_clock.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(riak_ensemble_clock).
-on_load(init/0).
-export([monotonic_time/0, monotonic_time_ms/0]).

monotonic_time() ->
erlang:nif_error({error, not_loaded}).

monotonic_time_ms() ->
erlang:nif_error({error, not_loaded}).

init() ->
case code:priv_dir(riak_ensemble) of
{error, bad_name} ->
case code:which(?MODULE) of
Filename when is_list(Filename) ->
SoName = filename:join([filename:dirname(Filename),"../priv", "riak_ensemble"]);
_ ->
SoName = filename:join("../priv", "riak_ensemble")
end;
Dir ->
SoName = filename:join(Dir, "riak_ensemble")
end,
erlang:load_nif(SoName, 0).
17 changes: 15 additions & 2 deletions src/riak_ensemble_config.erl
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,25 @@
tick() ->
get_env(ensemble_tick, 500).

%% @doc
%% The leader lease duration. Should be greater than the leader tick to give
%% the leader time to refresh before expiration, but lower than the follower
%% timeout.
lease() ->
get_env(lease_duration, tick() * 2 div 3).

%% @doc
%% This setting determines if leader leases are trusted or not. Trusting the
%% lease allows a leader to reply to reads without contacting remote peers
%% as long as its lease has not yet expired.
trust_lease() ->
get_env(trust_lease, true).

%% @doc
%% The follower timeout determines how long a follower waits to hear from
%% the leader before abandoning it.
follower_timeout() ->
get_env(follower_timeout, tick() * 2).
get_env(follower_timeout, lease() * 4).

%% @doc
%% The election timeout used for randomized election.
Expand Down Expand Up @@ -93,4 +107,3 @@ get_env(Key, Default) ->
{_, Val} ->
Val
end.

Loading