From 3eaf5484b903e16dff7deba2a815148bb3b10f3a Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Wed, 12 Sep 2018 20:52:37 -0300 Subject: [PATCH 01/10] fix of some out of array bounds and potential buffer overflow erros --- src/lib/mdbm.c | 13 ++++++++++--- src/lib/mdbm_lock.cc | 7 ++++--- src/lib/multi_lock.cc | 6 +++--- src/tools/mash.cc | 2 +- src/tools/mdbm_stat.c | 2 +- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/lib/mdbm.c b/src/lib/mdbm.c index 0a9ca2a..ade4ac4 100644 --- a/src/lib/mdbm.c +++ b/src/lib/mdbm.c @@ -4162,12 +4162,19 @@ mdbm_open_inner(const char *filename, int flags, int mode, int pagesize, int dbs db->db_sys_pagesize = db_sys_pagesize; /*init_locks(db); */ - db->db_filename[0] = 0; +// Code candidate for removal +/* db->db_filename[0] = 0; if (filename[0] == '/') { - strncat(db->db_filename,filename,sizeof(db->db_filename)-1); + int db_filename_len = strlen(db->db_filename); + strncat(db->db_filename, filename, MAXPATHLEN-db_filename_len); } else { - strncat(db->db_filename,pathname,sizeof(db->db_filename)-1); + int db_filename_len = strlen(db->db_filename); + strncat(db->db_filename, pathname, MAXPATHLEN-db_filename_len); } +*/ +// replacement code + strcpy(db->db_filename, pathname); + db->m_stat_cb = 0; db->m_stat_cb_flags = 0; db->m_stat_cb_user = 0; diff --git a/src/lib/mdbm_lock.cc b/src/lib/mdbm_lock.cc index d027e38..d61dd58 100644 --- a/src/lib/mdbm_lock.cc +++ b/src/lib/mdbm_lock.cc @@ -69,7 +69,8 @@ static void delete_helper(const char* name, int &ret, int &er) { int do_delete_lockfiles(const char* dbname) { int ret = 0, errcode=0; char realname[MAXPATHLEN+1]; - char fn[MAXPATHLEN+1]; + const char *lockfile_name_template = "/tmp/.mlock-named/%s._int_"; + char fn[MAXPATHLEN+1+25/*sizeof(lockfile_name_template)*/]; if (dbname[0] == '/') { if (realpath(dbname, realname) == NULL) { @@ -97,7 +98,7 @@ int do_delete_lockfiles(const char* dbname) { realname[MAXPATHLEN] = '\0'; } - snprintf(fn,sizeof(fn),"/tmp/.mlock-named/%s._int_", realname); + snprintf(fn, sizeof(fn), lockfile_name_template, realname); delete_helper(fn, ret, errcode); errno=errcode; @@ -195,7 +196,7 @@ lock_error(MDBM* db, const char* what, ...) { MdbmLockBase* locks = CAST_LOCKS(db); int err = errno; - const char* fname = db ? db->db_filename : NULL; + const char* fname = db ? db->db_filename : "NULL"; int flen = fname ? strlen(fname) : 0; int len = strlen(what) + flen + 4; va_list args; diff --git a/src/lib/multi_lock.cc b/src/lib/multi_lock.cc index 98cde6d..c0bfca8 100644 --- a/src/lib/multi_lock.cc +++ b/src/lib/multi_lock.cc @@ -1754,9 +1754,9 @@ int PthrLock::getFilename(const char* dbname, char* lockname, int maxlen) { if (maxlen < llen) { return -llen; } - strncpy(lockname, prefix, plen); - strncpy(lockname+plen, dbname, dblen); - strncpy(lockname+plen+dblen, suffix, slen); + strncpy(lockname, prefix, llen); + strncpy(lockname+plen, dbname, llen-plen); + strncpy(lockname+plen+dblen, suffix, llen-plen-dblen); lockname[llen-1] = 0; /* trailing null */ return llen; } diff --git a/src/tools/mash.cc b/src/tools/mash.cc index 37dc8a0..b74b926 100644 --- a/src/tools/mash.cc +++ b/src/tools/mash.cc @@ -1179,7 +1179,7 @@ class CatCommand : public MashCommand bytes = bufLen; truncated = true; } - buf[bytes]='\0'; + buf[bytes-1]='\0'; fprintf(OutputFilePtr, "%s\n", buf); if (truncated) { fprintf(OutputFilePtr, "\n"); diff --git a/src/tools/mdbm_stat.c b/src/tools/mdbm_stat.c index 6bc53c9..6dd2b72 100644 --- a/src/tools/mdbm_stat.c +++ b/src/tools/mdbm_stat.c @@ -553,7 +553,7 @@ main(int argc, char** argv) if (header) { char flags_buf[128]; - int left = sizeof(flags_buf); + int left = sizeof(flags_buf)-1; const char* magic; int align = hdr->h_dbflags & MDBM_ALIGN_MASK; time_t fetch_last_val = 0; From 4a330bdd5e48de61d0e409382eb38f89c72e929a Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Wed, 12 Sep 2018 20:54:20 -0300 Subject: [PATCH 02/10] added a little flexibility when building outsite of redhat -- and now, we can also build in ArchLinux --- src/java/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/java/Makefile b/src/java/Makefile index eddf80b..7de77cc 100644 --- a/src/java/Makefile +++ b/src/java/Makefile @@ -15,8 +15,12 @@ SONAME=-Wl,-soname,$(LIBNAME).$(LIBVER) #-Wl,-rpath,$(DEFAULT_LIB_INSTALL_PATH) # TODO: Fix this to not be rh specific -INCDIR += -I/usr/lib/jvm/java/include -I/usr/lib/jvm/java/include/linux -LIB_BUILD_FLAGS += -L/usr/lib/jvm/java/jre/lib/amd64/ -ljsig $(LIBRT) -L$(TOPDIR)/src/lib/$(OBJDIR) -lmdbm -Wall -fno-strict-aliasing -Wno-unused-function -D_FILE_OFFSET_BITS=64 +#INCDIR += -I/usr/lib/jvm/java/include -I/usr/lib/jvm/java/include/linux +#JAVALIBDIR += -L/usr/lib/jvm/java/jre/lib/amd64/ +# IF in ArchLinux, use these lines instead: +INCDIR += -I/usr/lib/jvm/java-10-jdk/include/ -I/usr/lib/jvm/java-10-jdk/include/linux +JAVALIBDIR += -L/usr/lib/jvm/java-10-jdk/lib/ +LIB_BUILD_FLAGS += $(JAVALIBDIR) -ljsig $(LIBRT) -L$(TOPDIR)/src/lib/$(OBJDIR) -lmdbm -Wall -fno-strict-aliasing -Wno-unused-function -D_FILE_OFFSET_BITS=64 LIB_DEST=$(PREFIX)/lib$(ARCH_SUFFIX) ifeq ($(SET_RPATH),1) @@ -43,4 +47,4 @@ install:: default-make-target clean :: clean-objs maven: - LD_LIBRARY_PATH=../lib/object/ mvn -B clean verify -Djava.awt.headless=true -DfailIfNoTests=false -DnativeDir=../lib/object/ -DlibDir=object/ + LD_LIBRARY_PATH=../lib/object/ mvn -B clean verify -Dosgi.requiredJavaVersion=1.8 -Djava.awt.headless=true -DfailIfNoTests=false -DnativeDir=../lib/object/ -DlibDir=object/ From 8317a93c2829795b105a9ae592a43ea9c40161d0 Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Wed, 12 Sep 2018 20:54:48 -0300 Subject: [PATCH 03/10] now supports java 10 --- src/java/pom.xml | 2 +- .../com/yahoo/db/mdbm/internal/DeallocatingClosedBase.java | 5 +++-- .../yahoo/db/mdbm/internal/NativeMdbmPoolImplementation.java | 3 --- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/java/pom.xml b/src/java/pom.xml index 7095191..c29a223 100644 --- a/src/java/pom.xml +++ b/src/java/pom.xml @@ -18,7 +18,7 @@ 3.1 1.3.3 1.7 - 1.9 + 11 ${min_jdk} ${max_jdk} ${min_jdk_version} diff --git a/src/java/src/main/java/com/yahoo/db/mdbm/internal/DeallocatingClosedBase.java b/src/java/src/main/java/com/yahoo/db/mdbm/internal/DeallocatingClosedBase.java index a7f5487..971d1fd 100644 --- a/src/java/src/main/java/com/yahoo/db/mdbm/internal/DeallocatingClosedBase.java +++ b/src/java/src/main/java/com/yahoo/db/mdbm/internal/DeallocatingClosedBase.java @@ -19,7 +19,7 @@ public abstract class DeallocatingClosedBase extends ClosedBaseChecked { protected volatile long pointer = 0L; protected volatile Deallocator deallocator; @SuppressWarnings("restriction") - protected volatile sun.misc.Cleaner cleaner; + protected volatile java.lang.ref.Cleaner cleaner; @SuppressWarnings("restriction") protected DeallocatingClosedBase(long pointer, Dealloc destructor) { @@ -27,7 +27,8 @@ protected DeallocatingClosedBase(long pointer, Dealloc destructor) { this.pointer = pointer; if (null != destructor) { this.deallocator = new Deallocator(pointer, destructor); - this.cleaner = sun.misc.Cleaner.create(this, deallocator); + this.cleaner = java.lang.ref.Cleaner.create(); + this.cleaner.register(this, deallocator); } else { this.deallocator = null; this.cleaner = null; diff --git a/src/java/src/main/java/com/yahoo/db/mdbm/internal/NativeMdbmPoolImplementation.java b/src/java/src/main/java/com/yahoo/db/mdbm/internal/NativeMdbmPoolImplementation.java index 4e3ff85..dd4c7a6 100644 --- a/src/java/src/main/java/com/yahoo/db/mdbm/internal/NativeMdbmPoolImplementation.java +++ b/src/java/src/main/java/com/yahoo/db/mdbm/internal/NativeMdbmPoolImplementation.java @@ -2,9 +2,6 @@ /* Licensed under the terms of the 3-Clause BSD license. See LICENSE file in the project root for details. */ package com.yahoo.db.mdbm.internal; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; import java.util.concurrent.atomic.AtomicInteger; import com.yahoo.db.mdbm.MdbmInterface; From 1350bb4afa0a77d75b92a7f050e34ea5d004792b Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Wed, 12 Sep 2018 20:55:34 -0300 Subject: [PATCH 04/10] fixed a relative path error --- src/test/func-test/TestBase.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/func-test/TestBase.cc b/src/test/func-test/TestBase.cc index 6b3f4b2..406f47a 100644 --- a/src/test/func-test/TestBase.cc +++ b/src/test/func-test/TestBase.cc @@ -33,7 +33,7 @@ std::string getMdbmToolPath(const std::string& tool_name) { // TODO TODO TODO test for existence and complain if non-existent char* plat = getenv("OBJDIR"); if (!plat) { plat = (char*)"object"; } - string path = string("../../tools/") + plat +string("/")+tool_name; + string path = string("../../../tools/") + plat +string("/")+tool_name; return path; } From f87bd77b979e8043243ca24dc5f85a388ca0d8aa Mon Sep 17 00:00:00 2001 From: timrc Date: Mon, 8 Dec 2014 15:48:52 -0800 Subject: [PATCH 05/10] Add blog entry. --- extra/EngBlog.md | 255 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 extra/EngBlog.md diff --git a/extra/EngBlog.md b/extra/EngBlog.md new file mode 100644 index 0000000..3813afc --- /dev/null +++ b/extra/EngBlog.md @@ -0,0 +1,255 @@ +### MDBM + + +#### Introduction + +Back in 1979, AT&T released a lightweight database engine written by Ken Thompson, +called DBM (http://en.wikipedia.org/wiki/Dbm). +In 1987 Ozan Yigit created a work-alike version, SDBM, that he released to the +public domain. + +The DBM family of databases has been quietly powering lots of things "under the +hood" on various versions of unix. I first encountered it rebuilding sendmail +rulesets on an early version of linux. + +A group of programmers at SGI, including Larry \McVoy, wrote a version based on SDBM, +called MDBM, with the twist that it memory-mapped the data. + +This is how MDBM came to Yahoo, over a decade ago, where it has also been quietly +powering lots of things "under the hood". We've been tinkering with it since that +time, improving performance for some of our particular use cases, and adding *lots* +of features (some might say too many). We've also added extensive documentation and +tests. + + +And I'm proud to say that Yahoo! has released our version back into the wild. + + - Source code:
+ - Documentation:
+ - User Group:
+ + + +#### "Who did what now?..." + +These days, all the cool kids are saying "NoSQL", and "Zero Copy", for high performance, +but MDBM has been living it for well over a decade. Lets talk about what they mean, +how they are achieved, and why you should care. + +The exact definition of "NoSQL" has gotten a bit muddy these days, now including +"not-only-SQL". But at it's core, it means optimizing the structure and interface +to your DB to maximize performance for your particular application. + +There are a number of things that SQL databases can do that MDBM can not. +MDBM is a simple key-value store. You can search for a key, and it will return references +to the associated value(s). You can store, overwrite, or append a value to a given key. +The interface is minimal. You can iterate over the database, but there are no "joins", +"views" or "select" clauses, nor any relationship between tables or entities unless +you explicitly create them. + +So, if MDBM doesn't have any of these features, why would you want to use it? + + 1. simplicity + 2. raw performance + + + +#### "Keep it simple..." + +The API has a lot of features, but using the main functionality is very simple. +Here's a quick example in C: + + +``` + datum key = { keyString, strlen(keyString) }; + datum value = { valueString, strlen(valueString) }; + datum found; + /* open a database, creating it if needed */ + MDBM *db = mdbm_open(filename, MDBM_O_RDWR|MDBM_O_CREAT, 0644, 0, 0); + /* store the value */ + mdbm_store(db, key, value, MDBM_REPLACE); + ... + /* fetch the value */ + mdbm_lock_smart (db, key, 0); + found = mdbm_fetch(db, key); + use_value(found); + mdbm_unlock_smart (db, key, 0); + ... + /* close the database */ + mdbm_close(db); +``` + +Additionally, fully functional databases can be less than 1k in size. They can also +be many terabytes in size (though that's not very practical yet on current hardware). +However, we do have DBs that are 10s of Gigabytes in common use, in production. + + + +#### Speed... it really is screaming fast; + +On hardware that was current several years ago, MDBM performed 15 million QPS for +read/write locking, and almost 10 million QPS for partitioned locking. +Both with latencies well under 5 microseconds. + +Here's a performance comparison data vs some other \NoSQL databases from a couple years ago: + + Performance: (based on \LevelDB benchmarks) + Machine: 8 Core Intel(R) Xeon(R) CPU L5420 @ 2.50GHz + +| *Test* | *MDBM* | *LevelDB* | *KyotoCabinet* | *BerkeleyDB* | +|:----------------|------------:|-------------:|---------------:|-------------:| +| Write Time | 1.1 μs | 4.5 μs | 5.1 μs | 14.0 μs | +| Read Time | 0.45 μs | 5.3 μs | 4.9 μs | 8.4 μs | +| Sequential Read | 0.05 μs | 0.53 μs | 1.71 μs | 39.1 μs | +| Sync Write | 2625 μs | 34944 μs | 177169 μs | 13001 μs | +[Performance Comparison] + + NOTES: + These are single-process, single-thread timings. + LevelDB does not support multi-process usage, and many features must be + lock-protected externally. + MDBM iteration (sequential read) is un-ordered. + Minimal tuning was performed on all of the candidates. + + +How does MDBM achieve this performance? There are two important components. + + 1. "Memory Mapping" - It leverages the kernel's virtual-memory system, + so that most operations can happen in-memory. + 2. "Zero-Copy" - The library provides raw pointers to data stored in the MDBM. + This requires some care (valgrind is your friend), but if you need the + performance, it's worth it. + If you want to trade the performance for safety, it's easy to do that too. + + +#### Memory Mapping - "It's all in your head" + +Behind the scenes, Linux (and many other operating systems) keep often used parts of files +in-memory via the virtual-memory subsystem. As different disk pages are needed, memory pages +will be written out to disk (if they've changed) and discarded. Then the needed pages are +read in to memory. MDBM leverages this system by explicitly telling the VM system to load +(memory-map) the database file. As pages are modified, they are written out to disk, but +writes can be delayed and bunched up until some threshold is reached, or the pages are +needed for something else. + +This means less wear-and-tear on your spinning-rust or solid-state disks, but it also +makes a huge difference in performance. Disks are perhaps an order-of-magnitude (10x) +slower than memory for sequential access (reading from beginning to end, or always +appending to the end of a file). However, for random access (what most DBs need), +disks can be 5 orders-of-magnitude (100,000 times) slower than memory. +Solid state disks fare a bit better, but there's still a huge gap. + +If there is a lot of memory pressure, you can "pin" the MDBM pages so that the VM system +will keep them in memory. Or, you could let the VM page parts in and out, with some +performance hit. But what if your dataset is bigger than your available memory? +Out of the box, MDBM can run with two (or more) levels, so you can have a "cache" MDBM +that keeps frequently used items together in memory, and lets less used entries stay +on-disk. You can also use "windowed-mode" where MDBM explicitly manages mapping portions +in and out of memory itself (with some performance penalty). + +#### "Zero-Copy" - "Saved by Zero" + +Lets look at what used to be involved in sending a message out over the network: +a) user assembles pieces of the message into one big buffer in memory (1st copy) +b) user calls network function +c) transition to kernel code +d) kernel copies user data to kernel memory (second copy) +e) kernel notifies device driver +f) driver copies data to device (third copy) +g) transition back to user space + +Each one of these copies (and transitions) has a very noticeable performance cost. +The linux kernel team spent a good amount of time and effort reducing this to: +a) user gives list of pieces to kernel (no copy) +b) transition to kernel code +c) kernel sets up DMA (direct-memory-access) for network card to read and send peices +d) transition back to user space + +If you're connecting to a remote SQL DB over the network, you're incurring these costs for +the request and the response on both sides. If you're connecting to a local service, then +you can replace the driver section with a copy to userspace for the DB server. +(This completely ignores network/loopback latency, and any disk writes for the server.) + +For something like \LevelDB, you still have to wait to copy data for the kernel, and +DMA it to the disk. (LevelDB appends new entries to a "log" file, and squashes the +various log files together as another pass over the data.) + +For an MDBM in steady state, you can do a normal store with the cost of one memory copy. +To avoid that extra copy, you can reserve space for a value, and update it in-place. +The data will be written out to disk eventually by the VM system, but you don't have to +wait for it. NOTE: you can explicitly flush the data to disk, but for highest performance, +you should let the VM batch up changes and flush them when when there is spare I/O and +cycles available. + +Because the data is explicitly mapped into memory, once you know the location +of a bit of data, you can treat it like any other bit of memory on the stack +or the heap. i.e. you can do something like: +``` + /* fetch a value */ + mdbm_lock_smart (db, key, 0); + found = mdbm_fetch(db, key); + /* increment the the entry in-place */ + *(int*)found.dptr += 1; + mdbm_unlock_smart (db, key, 0); +``` + +#### Data Distribution - "It's bigger on the inside..." + +MDBM allows you to use various hashing functions on a file-by-file basis, including FNV, +Jenkins, and MD5. So, you can usually find a decent page distribution for your data. +However, it's hard to escape statistics, so you will end up with pages that have +higher and lower occupancy than other pages. Also, if your values are not uniformly-sized, +then you may have some individual DB entries that vary wildly from the average. +These factors can all conspire to reduce the space efficiency of your DB. + +MDBM has several ways to cope with this: + + 1. It can split individual pages in two, using a form of [Extendible Hashing](http://en.wikipedia.org/wiki/Extendible_hashing). + 2. It has a feature called "overflow pages" that allows some pages to be larger than others. + 3. It has a feature called "large objects" that allows very big single DB entries, which are over a (configurable) size to be placed in a special area in the DB, outside of the normal pages. + + +#### "With great power comes great responsibility..." + +This all sounds great, but there are some costs of which you should be aware. + +On clean shutdown of the machine, all of the MDBM data will be flushed to disk. +However, in cases like power-failure and hardware problems, it's possible for +data to be lost, and the resulting DB to be corrupted. MDBM includes a tool to +check DB consistency. However, you should always have contingencies. +One way or another this is some form of redundancy... + +At Yahoo!, MDBM use typically falls into a few categories: + + 1. The DBs are cached data. So the DB can be truncated/deleted and will fill with appropriate data over time. + 2. The DBs are generated in bulk somewhere (i.e. Hadoop grid), and copied to where they are used. They can be re-copied from a source or peer. If they are read-only during use, then corruption is not an issue. + 3. The data represents transient data (monitoring), for which it's loss is less critical. + 4. The data needs to persist and is dynamically generated. We typically have some combination of redundancy across machines/data-centers, and logging the data to another channel. In case of damage, data can be copied from a peer, or re-generated from the logged data. + + +There is one other cost. Because MDBM gives you raw pointers into the DB's +data, you have to be very careful about making sure you don't have array over-runs, +invalid pointer access, or the like. Unit tests and tools like valgrind are a +great help in preventing issues. (You do have unit tests, right?) + +If you do run into a problem, MDBM does provide "protected mode", where pages +of the DB individually become writable only as needed. However, this comes +at a noticeable performance cost, so it isn't used in normal production. + +You shouldn't let the preceding costs scare you away, just be aware that +some care is required. Redundancy is always your friend. + +Yahoo has been using MDBM in production for over a decade, for things both +small (a few KB) and large (10s of GB). +One recent project has DBs ranging from 5MB to 10GB spread across 1500 DBs +(not counting replicas) for a total dataset size of 4 Terabytes. + +When I first encoutered MDBM, we had scaled out what was one of the largest +Oracle instances (at the time) in about every direction it could be scaled. +Unfortunately, the serving side was having trouble expanding enough to meet +latency requirements. The solution was a tier of partitioned (aka "sharded"), +replicated, distributed copies of the data in MDBMs. + +If it looks like it might be a fit for your application, take it out for a +spin, and let us know how it works for you. + From 0cabd611492a1c19b6570116fa16288812063d92 Mon Sep 17 00:00:00 2001 From: timrc-git Date: Wed, 9 Aug 2017 13:00:50 -0700 Subject: [PATCH 06/10] Fix off-by-one in mash. --- src/tools/mash.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/mash.cc b/src/tools/mash.cc index b74b926..ffba46d 100644 --- a/src/tools/mash.cc +++ b/src/tools/mash.cc @@ -1163,7 +1163,7 @@ class CatCommand : public MashCommand string arg = onearg; Utils.FinalizePath(arg); const int bufLen = 65536; - char buf[bufLen]; + char buf[bufLen+1]; int fd = open(arg.c_str(), O_RDONLY); if (fd < 0) { fprintf(OutputFilePtr, "Cat could not open [%s], %s\n", arg.c_str(), strerror(errno)); From d0fe37fa4ccbcc1cb1c11c4240918c3599897682 Mon Sep 17 00:00:00 2001 From: timrc-git Date: Wed, 13 Jun 2018 09:00:05 -0700 Subject: [PATCH 07/10] Fix mdbm_reset_all_locks. --- gendoc/README | 4 ++++ src/scripts/mdbm_reset_all_locks | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/gendoc/README b/gendoc/README index 05c0bc8..fb612ad 100644 --- a/gendoc/README +++ b/gendoc/README @@ -31,6 +31,10 @@ print < Date: Wed, 13 Jun 2018 09:26:37 -0700 Subject: [PATCH 08/10] Rename partition-lock define. --- src/lib/mdbm_lock.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/mdbm_lock.cc b/src/lib/mdbm_lock.cc index d61dd58..b425283 100644 --- a/src/lib/mdbm_lock.cc +++ b/src/lib/mdbm_lock.cc @@ -359,7 +359,7 @@ struct mdbm_locks* open_locks_inner(const char* dbname, int flags, int do_lock, count = get_cpu_count() * PARTITION_LOCK_CPU_MULTIPLIER; #else count = 128; -#endif // PARTITION_LOCK_CPU_COUNT +#endif // PARTITION_LOCK_COUNT } if (locks->open(dbname, flags, type, count, do_lock, need_check)) { delete locks; From 2710f01c041c4a93e62e8308eff2681d2017eb76 Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Wed, 12 Sep 2018 21:29:52 -0300 Subject: [PATCH 09/10] merge of two independent fixes -- in favor of full page reads --- src/tools/mash.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/mash.cc b/src/tools/mash.cc index ffba46d..e35abde 100644 --- a/src/tools/mash.cc +++ b/src/tools/mash.cc @@ -1169,7 +1169,7 @@ class CatCommand : public MashCommand fprintf(OutputFilePtr, "Cat could not open [%s], %s\n", arg.c_str(), strerror(errno)); return (1); } - int bytes = pread(fd, buf, bufLen-1, 0); + int bytes = pread(fd, buf, bufLen, 0); fprintf(OutputFilePtr, "[%s] %d bytes: \n", arg.c_str(), bytes); if (bytes<=0) { fprintf(OutputFilePtr, "\n"); @@ -1179,7 +1179,7 @@ class CatCommand : public MashCommand bytes = bufLen; truncated = true; } - buf[bytes-1]='\0'; + buf[bytes]='\0'; fprintf(OutputFilePtr, "%s\n", buf); if (truncated) { fprintf(OutputFilePtr, "\n"); From 77f04001ba7dea9165eed73e62f7d462c1659311 Mon Sep 17 00:00:00 2001 From: Luiz Silveira Date: Thu, 13 Sep 2018 15:39:02 -0300 Subject: [PATCH 10/10] now supports ARMv7a --- src/lib/atomic.h | 8 +++++++- src/lib/log.c | 13 ++++++++----- src/lib/mdbm.c | 4 ++++ src/test/unit-test/test_dmbase.cc | 5 +++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/lib/atomic.h b/src/lib/atomic.h index 48500d5..b346632 100644 --- a/src/lib/atomic.h +++ b/src/lib/atomic.h @@ -78,13 +78,19 @@ static inline void atomic_barrier() { static inline void atomic_read_barrier() { #ifdef __x86_64__ __asm__ __volatile__ ("lfence" : : : "memory"); -#else +#elif __ARM_ARCH_7A__ + __asm__ __volatile__ ("dmb"); +#else // X86 __asm__ __volatile__ ("lock addl $0,0(%%esp)" : : : "memory"); #endif } static inline void atomic_pause() { +#ifdef __ARM_ARCH_7A__ + __asm__ __volatile__ ("yield"); +#else // X86 & X86_64 __asm__ __volatile__ ("pause"); +#endif } diff --git a/src/lib/log.c b/src/lib/log.c index 80591e5..c4a1f41 100644 --- a/src/lib/log.c +++ b/src/lib/log.c @@ -127,7 +127,10 @@ int mdbm_log_vlogerror_at (const char* file, int line, int level, int error, con if (len < sizeof(buf)) { mdbm_strlcpy(buf+len,strerror(error),sizeof(buf)-len); } - return mdbm_log_vlog_at(file, line, level,buf,NULL); + { + va_list unused; + return mdbm_log_vlog_at(file, line, level,buf,unused); + } } @@ -170,11 +173,11 @@ int mdbm_log_vlog_at (const char* file, int line, int level, const char* format, offset += sizeof(FATAL)-1; } - if (args) { + //if (args) { vsnprintf(buf+offset,sizeof(buf)-offset-2,format,args); - } else { - mdbm_strlcpy(buf+offset,format,sizeof(buf)-offset-2); - } + //} else { + // mdbm_strlcpy(buf+offset,format,sizeof(buf)-offset-2); + //} buflen = strlen(buf); if (buf[buflen-1] != '\n') { diff --git a/src/lib/mdbm.c b/src/lib/mdbm.c index ade4ac4..ff9ef38 100644 --- a/src/lib/mdbm.c +++ b/src/lib/mdbm.c @@ -147,11 +147,15 @@ volatile static uint64_t tsc_per_usec; /* TSC clock cycles per microsecond */ * WANRING: This value may be affected by speedstep and may vary randomly across cores. */ static inline uint64_t rdtsc(void) { +#ifdef __ARM_ARCH_7A__ + return 0ul; +#else uint32_t lo, hi; /* We cannot use "=A", since this would use %rax on x86_64 and * return only the lower 32bits of the TSC */ __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (((uint64_t)hi) << 32) | lo; +#endif } uint64_t diff --git a/src/test/unit-test/test_dmbase.cc b/src/test/unit-test/test_dmbase.cc index 86fb4dc..bbcf807 100644 --- a/src/test/unit-test/test_dmbase.cc +++ b/src/test/unit-test/test_dmbase.cc @@ -1950,8 +1950,13 @@ DataMgmtBaseTestSuite::FilledSinglePagedDbNonDefsAndShakeFuncPurgeG5() uint64_t size_post = mdbm_get_size(dbh); uint64_t count_post = mdbm_count_records(dbh); +#ifdef __ARM_ARCH_7A__ + fprintf(stderr, "MDBM SIZE went from %llu to %llu after %d entries (orig:%llu ent-prior:%llu post:%llu)\n", + size_prior, size_post, refillAddCnt, count_orig, count_prior, count_post); +#else fprintf(stderr, "MDBM SIZE went from %lu to %lu after %d entries (orig:%lu ent-prior:%lu post:%lu)\n", size_prior, size_post, refillAddCnt, count_orig, count_prior, count_post); +#endif // Should only be able to add twice the number of entries as first time // since verifyDefaultConfig performs mdbm_limit_size with twice the number of pages