-
Notifications
You must be signed in to change notification settings - Fork 4k
[WIP] Integrate in-memory object store into arrow #467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4b4d203
09da8ad
ae37e37
04ebcba
26a370a
2562b6d
74b68c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| cmake_minimum_required(VERSION 2.8) | ||
|
|
||
| project(plasma) | ||
|
|
||
| add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/common/) | ||
|
|
||
| # Recursively include common | ||
| include(${CMAKE_CURRENT_LIST_DIR}/cmake/Common.cmake) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") | ||
|
|
||
| if(APPLE) | ||
| SET(CMAKE_SHARED_LIBRARY_SUFFIX ".so") | ||
| endif(APPLE) | ||
|
|
||
| include_directories("${PYTHON_INCLUDE_DIRS}" thirdparty) | ||
|
|
||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L") | ||
|
|
||
| # Compile flatbuffers | ||
|
|
||
| set(PLASMA_FBS_SRC "${CMAKE_CURRENT_LIST_DIR}/format/plasma.fbs") | ||
| set(OUTPUT_DIR ${CMAKE_CURRENT_LIST_DIR}/format/) | ||
|
|
||
| add_custom_target(gen_plasma_fbs ALL) | ||
|
|
||
| add_custom_command( | ||
| TARGET gen_plasma_fbs | ||
| # The --gen-object-api flag generates a C++ class MessageT for each | ||
| # flatbuffers message Message, which can be used to store deserialized | ||
| # messages in data structures. This is currently used for ObjectInfo for | ||
| # example. | ||
| COMMAND ${FLATBUFFERS_COMPILER} -c -o ${OUTPUT_DIR} ${PLASMA_FBS_SRC} --gen-object-api | ||
| DEPENDS ${PLASMA_FBS_SRC} | ||
| COMMENT "Running flatc compiler on ${PLASMA_FBS_SRC}" | ||
| VERBATIM | ||
| ) | ||
|
|
||
| add_dependencies(gen_plasma_fbs flatbuffers_ep) | ||
|
|
||
| if(UNIX AND NOT APPLE) | ||
| link_libraries(rt) | ||
| endif() | ||
|
|
||
| include_directories("${CMAKE_CURRENT_LIST_DIR}/") | ||
| include_directories("${CMAKE_CURRENT_LIST_DIR}/common") | ||
| include_directories("${CMAKE_CURRENT_LIST_DIR}/common/thirdparty") | ||
|
|
||
| add_library(plasma SHARED | ||
| plasma.cc | ||
| plasma_extension.cc | ||
| plasma_protocol.cc | ||
| plasma_client.cc | ||
| thirdparty/xxhash.c | ||
| fling.c) | ||
|
|
||
| add_dependencies(plasma gen_plasma_fbs) | ||
|
|
||
| get_filename_component(PYTHON_SHARED_LIBRARY ${PYTHON_LIBRARIES} NAME) | ||
| if(APPLE) | ||
| add_custom_command(TARGET plasma | ||
| POST_BUILD COMMAND ${CMAKE_INSTALL_NAME_TOOL} -change ${PYTHON_SHARED_LIBRARY} ${PYTHON_LIBRARIES} libplasma.so) | ||
| endif(APPLE) | ||
|
|
||
| if(APPLE) | ||
| target_link_libraries(plasma -Wl,-force_load,${FLATBUFFERS_STATIC_LIB} common ${PYTHON_LIBRARIES} ${FLATBUFFERS_STATIC_LIB} -lpthread) | ||
| else(APPLE) | ||
| target_link_libraries(plasma -Wl,--whole-archive ${FLATBUFFERS_STATIC_LIB} -Wl,--no-whole-archive common ${PYTHON_LIBRARIES} ${FLATBUFFERS_STATIC_LIB} -lpthread) | ||
| endif(APPLE) | ||
|
|
||
| include_directories("${FLATBUFFERS_INCLUDE_DIR}") | ||
|
|
||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") | ||
|
|
||
| set_source_files_properties(thirdparty/dlmalloc.c PROPERTIES COMPILE_FLAGS -Wno-all) | ||
|
|
||
| add_executable(plasma_store | ||
| plasma_store.cc | ||
| plasma.cc | ||
| plasma_protocol.cc | ||
| eviction_policy.cc | ||
| fling.c | ||
| malloc.c) | ||
|
|
||
| add_dependencies(plasma_store hiredis gen_plasma_fbs) | ||
|
|
||
| target_link_libraries(plasma_store common ${FLATBUFFERS_STATIC_LIB}) | ||
|
|
||
| add_library(plasma_lib STATIC | ||
| plasma_client.cc | ||
| plasma.cc | ||
| plasma_protocol.cc | ||
| fling.c | ||
| thirdparty/xxhash.c) | ||
|
|
||
| target_link_libraries(plasma_lib common ${FLATBUFFERS_STATIC_LIB} -lpthread) | ||
| add_dependencies(plasma_lib gen_plasma_fbs) | ||
|
|
||
| add_dependencies(plasma protocol_fbs) | ||
|
|
||
| add_library(plasma_client SHARED plasma_client.cc) | ||
| target_link_libraries(plasma_client ${FLATBUFFERS_STATIC_LIB}) | ||
|
|
||
| target_link_libraries(plasma_client common plasma_lib ${FLATBUFFERS_STATIC_LIB}) | ||
|
|
||
| function(define_test test_name library) | ||
| add_executable(${test_name} test/${test_name}.cc ${ARGN}) | ||
| add_dependencies(${test_name} hiredis flatbuffers_ep) | ||
| target_link_libraries(${test_name} common ${FLATBUFFERS_STATIC_LIB} ${library}) | ||
| target_compile_options(${test_name} PUBLIC "-DPLASMA_TEST -DLOCAL_SCHEDULER_TEST -DCOMMON_TEST -DRAY_COMMON_LOG_LEVEL=4 -DRAY_TIMEOUT=50") | ||
| endfunction() | ||
|
|
||
| define_test(client_tests plasma_lib) | ||
| define_test(serialization_tests plasma_lib) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| # Code for compiling flatbuffers | ||
|
|
||
| include(ExternalProject) | ||
| include(CMakeParseArguments) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") | ||
|
|
||
| set(FLATBUFFERS_VERSION "1.6.0") | ||
|
|
||
| set(FLATBUFFERS_PREFIX "${CMAKE_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install") | ||
| if (NOT TARGET flatbuffers_ep) | ||
| ExternalProject_Add(flatbuffers_ep | ||
| URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz" | ||
| CMAKE_ARGS | ||
| "-DCMAKE_CXX_FLAGS=-fPIC" | ||
| "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" | ||
| "-DFLATBUFFERS_BUILD_TESTS=OFF") | ||
| endif() | ||
|
|
||
| set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include") | ||
| set(FLATBUFFERS_STATIC_LIB "${FLATBUFFERS_PREFIX}/lib/libflatbuffers.a") | ||
| set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc") | ||
|
|
||
| message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}") | ||
| message(STATUS "Flatbuffers static library: ${FLATBUFFERS_STATIC_LIB}") | ||
| message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}") | ||
| include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) | ||
|
|
||
| # Custom CFLAGS | ||
|
|
||
| set(CMAKE_C_FLAGS "-g -Wall -Wextra -Werror=implicit-function-declaration -Wno-sign-compare -Wno-unused-parameter -Wno-type-limits -Wno-missing-field-initializers --std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -fPIC -std=c99") | ||
|
|
||
| # Code for finding Python | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code for getting cmake to find the right version of Python is the product of lots of trial and error. Eventually it would be good to unify this with pyarrow.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's https://github.com/apache/arrow/blob/master/cpp/cmake_modules/FindPythonLibsNew.cmake, is one or the other made redundant by the other? |
||
|
|
||
| message(STATUS "Trying custom approach for finding Python.") | ||
| # Start off by figuring out which Python executable to use. | ||
| find_program(CUSTOM_PYTHON_EXECUTABLE python) | ||
| message(STATUS "Found Python program: ${CUSTOM_PYTHON_EXECUTABLE}") | ||
| execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "import sys; print('python' + sys.version[0:3])" | ||
| OUTPUT_VARIABLE PYTHON_LIBRARY_NAME OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
| message(STATUS "PYTHON_LIBRARY_NAME: " ${PYTHON_LIBRARY_NAME}) | ||
| # Now find the Python include directories. | ||
| execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_inc())" | ||
| OUTPUT_VARIABLE PYTHON_INCLUDE_DIRS OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
| message(STATUS "PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS}) | ||
| # Now find the Python libraries. We'll start by looking near the Python | ||
| # executable. If that fails, then we'll look near the Python include | ||
| # directories. | ||
| execute_process(COMMAND ${CUSTOM_PYTHON_EXECUTABLE} -c "import sys; print(sys.exec_prefix)" | ||
| OUTPUT_VARIABLE PYTHON_PREFIX OUTPUT_STRIP_TRAILING_WHITESPACE) | ||
| message(STATUS "PYTHON_PREFIX: " ${PYTHON_PREFIX}) | ||
| # The name ending in "m" is for miniconda. | ||
| FIND_LIBRARY(PYTHON_LIBRARIES | ||
| NAMES "${PYTHON_LIBRARY_NAME}" "${PYTHON_LIBRARY_NAME}m" | ||
| HINTS "${PYTHON_PREFIX}" | ||
| PATH_SUFFIXES "lib" "libs" | ||
| NO_DEFAULT_PATH) | ||
| message(STATUS "PYTHON_LIBRARIES: " ${PYTHON_LIBRARIES}) | ||
| # If that failed, perhaps because the user is in a virtualenv, search around | ||
| # the Python include directories. | ||
| if(NOT PYTHON_LIBRARIES) | ||
| message(STATUS "Failed to find PYTHON_LIBRARIES near the Python executable, so now looking near the Python include directories.") | ||
| # The name ending in "m" is for miniconda. | ||
| FIND_LIBRARY(PYTHON_LIBRARIES | ||
| NAMES "${PYTHON_LIBRARY_NAME}" "${PYTHON_LIBRARY_NAME}m" | ||
| HINTS "${PYTHON_INCLUDE_DIRS}/../.." | ||
| PATH_SUFFIXES "lib" "libs" | ||
| NO_DEFAULT_PATH) | ||
| message(STATUS "PYTHON_LIBRARIES: " ${PYTHON_LIBRARIES}) | ||
| endif() | ||
| # If we found the Python libraries and the include directories, then continue | ||
| # on. If not, then try find_package as a last resort, but it probably won't | ||
| # work. | ||
| if(PYTHON_LIBRARIES AND PYTHON_INCLUDE_DIRS) | ||
| message(STATUS "The custom approach for finding Python succeeded.") | ||
| SET(PYTHONLIBS_FOUND TRUE) | ||
| else() | ||
| message(WARNING "The custom approach for finding Python failed. Defaulting to find_package.") | ||
| find_package(PythonInterp REQUIRED) | ||
| find_package(PythonLibs ${PYTHON_VERSION_STRING} EXACT REQUIRED) | ||
| set(CUSTOM_PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) | ||
| endif() | ||
|
|
||
| message(STATUS "Using CUSTOM_PYTHON_EXECUTABLE: " ${CUSTOM_PYTHON_EXECUTABLE}) | ||
| message(STATUS "Using PYTHON_LIBRARIES: " ${PYTHON_LIBRARIES}) | ||
| message(STATUS "Using PYTHON_INCLUDE_DIRS: " ${PYTHON_INCLUDE_DIRS}) | ||
|
|
||
| # Common libraries | ||
|
|
||
| set(COMMON_LIB "${CMAKE_BINARY_DIR}/src/common/libcommon.a" | ||
| CACHE STRING "Path to libcommon.a") | ||
|
|
||
| include_directories("${CMAKE_CURRENT_LIST_DIR}/../..") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| cmake_minimum_required(VERSION 2.8) | ||
|
|
||
| project(common) | ||
|
|
||
| include(${CMAKE_CURRENT_LIST_DIR}/../cmake/Common.cmake) | ||
|
|
||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") | ||
|
|
||
| include_directories(.) | ||
| include_directories(thirdparty) | ||
|
|
||
| add_library(common STATIC | ||
| event_loop.cc | ||
| common.cc | ||
| io.cc | ||
| thirdparty/ae/ae.c | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Taken from https://github.com/antirez/redis. BSD-3 |
||
| thirdparty/sha256.c) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Taken from here I believe https://github.com/B-Con/crypto-algorithms. There doesn't appear to be an explicit LICENSE file, but the README describes a permissive policy. |
||
|
|
||
| function(define_test test_name library) | ||
| add_executable(${test_name} test/${test_name}.cc ${ARGN}) | ||
| add_dependencies(${test_name} hiredis flatbuffers_ep) | ||
| target_link_libraries(${test_name} common ${FLATBUFFERS_STATIC_LIB} ${library}) | ||
| target_compile_options(${test_name} PUBLIC "-DPLASMA_TEST -DLOCAL_SCHEDULER_TEST -DCOMMON_TEST -DRAY_COMMON_LOG_LEVEL=4 -DRAY_TIMEOUT=50") | ||
| endfunction() | ||
|
|
||
| define_test(common_tests "") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| #include "common.h" | ||
|
|
||
| #include <stdio.h> | ||
| #include <unistd.h> | ||
| #include <sys/types.h> | ||
| #include <sys/stat.h> | ||
| #include <fcntl.h> | ||
|
|
||
| #include "io.h" | ||
|
|
||
| /* This is used to define the array of object IDs. */ | ||
| const UT_icd object_id_icd = {sizeof(ObjectID), NULL, NULL, NULL}; | ||
|
|
||
| const UniqueID NIL_ID = {{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | ||
| 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}}; | ||
|
|
||
| const unsigned char NIL_DIGEST[DIGEST_SIZE] = {0}; | ||
|
|
||
| UniqueID globally_unique_id(void) { | ||
| /* Use /dev/urandom for "real" randomness. */ | ||
| int fd; | ||
| int const flags = 0 /* for Windows compatibility */; | ||
| if ((fd = open("/dev/urandom", O_RDONLY, flags)) == -1) { | ||
| ARROW_LOG(ERROR) << "Could not generate random number"; | ||
| } | ||
| UniqueID result; | ||
| if(read_bytes(fd, &result.id[0], UNIQUE_ID_SIZE) < 0) { | ||
| ARROW_LOG(FATAL) << "read_bytes failed"; | ||
| } | ||
| close(fd); | ||
| return result; | ||
| } | ||
|
|
||
| bool ObjectID_equal(ObjectID first_id, ObjectID second_id) { | ||
| return UNIQUE_ID_EQ(first_id, second_id); | ||
| } | ||
|
|
||
| bool ObjectID_is_nil(ObjectID id) { | ||
| return ObjectID_equal(id, NIL_OBJECT_ID); | ||
| } | ||
|
|
||
| char *ObjectID_to_string(ObjectID obj_id, char *id_string, int id_length) { | ||
| DCHECK(id_length >= ID_STRING_SIZE); | ||
| static const char hex[] = "0123456789abcdef"; | ||
| char *buf = id_string; | ||
|
|
||
| for (int i = 0; i < UNIQUE_ID_SIZE; i++) { | ||
| unsigned int val = obj_id.id[i]; | ||
| *buf++ = hex[val >> 4]; | ||
| *buf++ = hex[val & 0xf]; | ||
| } | ||
| *buf = '\0'; | ||
|
|
||
| return id_string; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| #ifndef COMMON_H | ||
| #define COMMON_H | ||
|
|
||
| #include <stdbool.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <errno.h> | ||
| #include <inttypes.h> | ||
| #ifndef _WIN32 | ||
| #include <execinfo.h> | ||
| #endif | ||
|
|
||
| #include "utarray.h" | ||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
| #include "sha256.h" | ||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
|
|
||
| #include "arrow/util/logging.h" | ||
|
|
||
| /** Assertion definitions, with optional logging. */ | ||
|
|
||
| #define CHECK_MSG(COND, M, ...) \ | ||
| if (!(COND)) { \ | ||
| ARROW_LOG(FATAL) << "Check failure: " << M << " " << #COND " " ##__VA_ARGS__; \ | ||
| } | ||
|
|
||
| #define CHECK(COND) CHECK_MSG(COND, "") | ||
|
|
||
| /* Arrow defines the same macro, only define it if it has not already been | ||
| * defined. */ | ||
| #ifndef UNUSED | ||
| #define UNUSED(x) ((void) (x)) | ||
| #endif | ||
|
|
||
| /* These are exit codes for common errors that can occur in Ray components. */ | ||
| #define EXIT_COULD_NOT_BIND_PORT -2 | ||
|
|
||
| /** This macro indicates that this pointer owns the data it is pointing to | ||
| * and is responsible for freeing it. */ | ||
| #define OWNER | ||
|
|
||
| /** Definitions for unique ID types. */ | ||
| #define UNIQUE_ID_SIZE 20 | ||
|
|
||
| #define UNIQUE_ID_EQ(id1, id2) (memcmp((id1).id, (id2).id, UNIQUE_ID_SIZE) == 0) | ||
|
|
||
| #define IS_NIL_ID(id) UNIQUE_ID_EQ(id, NIL_ID) | ||
|
|
||
| typedef struct { unsigned char id[UNIQUE_ID_SIZE]; } UniqueID; | ||
|
|
||
| extern const UT_icd object_id_icd; | ||
|
|
||
| extern const UniqueID NIL_ID; | ||
|
|
||
| /* Generate a globally unique ID. */ | ||
| UniqueID globally_unique_id(void); | ||
|
|
||
| #define NIL_OBJECT_ID NIL_ID | ||
|
|
||
| typedef UniqueID ObjectID; | ||
|
|
||
| #define ID_STRING_SIZE (2 * UNIQUE_ID_SIZE + 1) | ||
|
|
||
| /** | ||
| * Convert an object ID to a hexdecimal string. This function assumes that | ||
| * buffer points to an already allocated char array of size ID_STRING_SIZE. And | ||
| * it writes a null-terminated hex-formatted string to id_string. | ||
| * | ||
| * @param obj_id The object ID to convert to a string. | ||
| * @param id_string A buffer to write the string to. It is assumed that this is | ||
| * managed by the caller and is sufficiently long to store the object ID | ||
| * string. | ||
| * @param id_length The length of the id_string buffer. | ||
| */ | ||
| char *ObjectID_to_string(ObjectID obj_id, char *id_string, int id_length); | ||
|
|
||
| /** | ||
| * Compare two object IDs. | ||
| * | ||
| * @param first_id The first object ID to compare. | ||
| * @param second_id The first object ID to compare. | ||
| * @return True if the object IDs are the same and false otherwise. | ||
| */ | ||
| bool ObjectID_equal(ObjectID first_id, ObjectID second_id); | ||
|
|
||
| /** | ||
| * Compare a object ID to the nil ID. | ||
| * | ||
| * @param id The object ID to compare to nil. | ||
| * @return True if the object ID is equal to nil. | ||
| */ | ||
| bool ObjectID_is_nil(ObjectID id); | ||
|
|
||
| /** Definitions for computing hash digests. */ | ||
| #define DIGEST_SIZE SHA256_BLOCK_SIZE | ||
|
|
||
| extern const unsigned char NIL_DIGEST[DIGEST_SIZE]; | ||
|
|
||
| #endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI, we're pulling in our own copy of flatbuffers here.