db47: replace with db

There's no need to maintain an old version.

Removed all patches as they are all upstream.

Import new ones from Arch, Debian, and OpenEmbedded.

Signed-off-by: Rosen Penev <rosenp@gmail.com>
This commit is contained in:
Rosen Penev
2024-08-18 11:41:26 -07:00
parent 903cad3153
commit e7badaa6cb
22 changed files with 2041 additions and 453 deletions

View File

@@ -7,24 +7,20 @@
include $(TOPDIR)/rules.mk
BASE_VERSION:=4.7.25
PKG_NAME:=db
PKG_VERSION:=5.3.28
PKG_RELEASE:=1
PKG_NAME:=db47
PKG_VERSION:=$(BASE_VERSION).4.NC
PKG_RELEASE:=7
PKG_BUILD_DIR:=$(BUILD_DIR)/db-$(BASE_VERSION).NC
PKG_SOURCE:=db-$(BASE_VERSION).NC.tar.gz
PKG_SOURCE_URL:=http://download.oracle.com/berkeley-db/
PKG_HASH:=cd39c711023ff44c01d3c8ff0323eef7318660772b24f287556e6bf676a12535
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
PKG_SOURCE_URL:=https://download.oracle.com/berkeley-db/
PKG_HASH:=e0a992d740709892e81f9d93f06daf305cf73fb81b545afe72478043172c3628
PKG_MAINTAINER:=Marcel Denia <naoir@gmx.net>
PKG_LICENSE:=Sleepycat
PKG_LICENSE_FILES:=LICENSE
PKG_BUILD_DEPENDS:=libxml2
PKG_FIXUP:=autoreconf
PKG_LIBTOOL_PATHS:=. build_unix
PKG_BUILD_DEPENDS:=libxml2
PKG_BUILD_PARALLEL:=1
include $(INCLUDE_DIR)/package.mk
@@ -32,26 +28,28 @@ include $(INCLUDE_DIR)/package.mk
define Package/libdb47
SECTION:=libs
CATEGORY:=Libraries
TITLE:=Berkeley DB library (4.7)
TITLE:=Berkeley DB library
URL:=http://www.oracle.com/us/products/database/berkeley-db
PROVIDES:=libdb47-full
ABI_VERSION:=5
endef
define Package/libdb47/description
Berkeley DB library (4.7).
Berkeley DB library.
endef
define Package/libdb47xx
SECTION:=libs
CATEGORY:=Libraries
DEPENDS:=+libdb47 +libstdcpp
TITLE:=Berkeley DB library (4.7) for C++
TITLE:=Berkeley DB library for C++
URL:=http://www.oracle.com/us/products/database/berkeley-db
PROVIDES:=libdb47xx-full
ABI_VERSION:=5
endef
define Package/libdb47xx/description
Berkeley DB library (4.7). C++ wrapper.
Berkeley DB library C++ wrapper.
endef
CONFIGURE_PATH = build_unix
@@ -63,7 +61,6 @@ CONFIGURE_ARGS += \
--disable-java \
--with-mutex=POSIX/pthreads/library \
--disable-tcl \
--disable-rpc \
--enable-compat185 \
--disable-debug \
$(if $(CONFIG_PACKAGE_libdb47xx),--enable-cxx,--disable-cxx)

View File

@@ -0,0 +1,19 @@
With higher paralelism it sometimes fails with:
libtool: link: `util_log.lo' is not a valid libtool object
make: *** [db_replicate] Error 1
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
--- a/dist/Makefile.in
+++ b/dist/Makefile.in
@@ -1034,7 +1034,7 @@ db_recover: db_recover@o@ util_sig@o@ $(
db_recover@o@ util_sig@o@ $(DEF_LIB) $(LIBS)
$(POSTLINK) $@
-db_replicate: db_replicate@o@ util_sig@o@ $(DEF_LIB)
+db_replicate: db_replicate@o@ util_log@o@ util_sig@o@ $(DEF_LIB)
$(CCLINK) -o $@ $(LDFLAGS) \
db_replicate@o@ util_log@o@ util_sig@o@ $(DEF_LIB) $(LIBS)
$(POSTLINK) $@

View File

@@ -0,0 +1,40 @@
From 29621d637e30982489693f2e207ce6a1790e3337 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Wed, 22 Mar 2017 15:32:26 +0000
Subject: [PATCH] atomic: Rename local __atomic_compare_exchange to avoid clash
with builtins
Helps building with clang
Fixes
../db-5.3.28/src/dbinc/atomic.h:179:19: error: definition of builtin function '__atomic_compare_exchange'
static inline int __atomic_compare_exchange(
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
src/dbinc/atomic.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/src/dbinc/atomic.h
+++ b/src/dbinc/atomic.h
@@ -144,7 +144,7 @@ typedef LONG volatile *interlocked_val;
#define atomic_inc(env, p) __atomic_inc(p)
#define atomic_dec(env, p) __atomic_dec(p)
#define atomic_compare_exchange(env, p, o, n) \
- __atomic_compare_exchange((p), (o), (n))
+ __db_atomic_compare_exchange((p), (o), (n))
static inline int __atomic_inc(db_atomic_t *p)
{
int temp;
@@ -176,7 +176,7 @@ static inline int __atomic_dec(db_atomic
* http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
* which configure could be changed to use.
*/
-static inline int __atomic_compare_exchange(
+static inline int __db_atomic_compare_exchange(
db_atomic_t *p, atomic_value_t oldval, atomic_value_t newval)
{
atomic_value_t was;

View File

@@ -0,0 +1,37 @@
From 32e5943a3c4637d39e4d65b544dcb99e280210e3 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Sun, 23 Jul 2017 10:54:26 -0700
Subject: [PATCH] configure: Add explicit tag options to libtool invocation
This helps cross compile when tag inference via heuristics
fail because CC variable is having -fPIE -pie and libtool
smartly removes it when building libraries
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
dist/configure.ac | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
--- a/dist/configure.ac
+++ b/dist/configure.ac
@@ -366,12 +366,12 @@ LIBTOOL="./libtool"
INSTALLER="\$(LIBTOOL) --mode=install cp -p"
-MAKEFILE_CC="\$(LIBTOOL) --mode=compile ${MAKEFILE_CC}"
-MAKEFILE_SOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK} -avoid-version"
-MAKEFILE_CCLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CCLINK}"
-MAKEFILE_CXX="\$(LIBTOOL) --mode=compile ${MAKEFILE_CXX}"
-MAKEFILE_XSOLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK} -avoid-version"
-MAKEFILE_CXXLINK="\$(LIBTOOL) --mode=link ${MAKEFILE_CXXLINK}"
+MAKEFILE_CC="\$(LIBTOOL) --tag=CC --mode=compile ${MAKEFILE_CC}"
+MAKEFILE_SOLINK="\$(LIBTOOL) --tag=CC --mode=link ${MAKEFILE_CCLINK} -avoid-version"
+MAKEFILE_CCLINK="\$(LIBTOOL) --tag=CC --mode=link ${MAKEFILE_CCLINK}"
+MAKEFILE_CXX="\$(LIBTOOL) --tag=CXX --mode=compile ${MAKEFILE_CXX}"
+MAKEFILE_XSOLINK="\$(LIBTOOL) --tag=CXX --mode=link ${MAKEFILE_CXXLINK} -avoid-version"
+MAKEFILE_CXXLINK="\$(LIBTOOL) --tag=CXX --mode=link ${MAKEFILE_CXXLINK}"
case "$host_os" in

View File

@@ -0,0 +1,76 @@
configure wants to use host-specific types to get a 64-bit integer in db.h
instead of using an alias such as int64_t. This means that the header differs
in multilib environments for no good reason, so replace the type with the alias
in stdint.h.
This then breaks the overly complicated type check but as we know that int64_t
exists and works, we can just delete that.
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Ross Burton <ross.burton@intel.com>
--- a/dist/aclocal/sequence.m4
+++ b/dist/aclocal/sequence.m4
@@ -21,14 +21,14 @@ AC_DEFUN(AM_SEQUENCE_CONFIGURE, [
db_cv_seq_type="no"
if test "$db_cv_build_sequence" = "yes" -a\
"$ac_cv_sizeof_long" -eq "8"; then
- db_cv_seq_type="long"
+ db_cv_seq_type="int64_t"
db_cv_seq_fmt='"%ld"'
db_cv_seq_ufmt='"%lu"'
INT64_FMT='#define INT64_FMT "%ld"'
UINT64_FMT='#define UINT64_FMT "%lu"'
else if test "$db_cv_build_sequence" = "yes" -a\
"$ac_cv_sizeof_long_long" -eq "8"; then
- db_cv_seq_type="long long"
+ db_cv_seq_type="int64_t"
db_cv_seq_fmt='"%lld"'
db_cv_seq_ufmt='"%llu"'
INT64_FMT='#define INT64_FMT "%lld"'
@@ -38,44 +38,7 @@ AC_DEFUN(AM_SEQUENCE_CONFIGURE, [
fi
fi
- # Test to see if we can declare variables of the appropriate size
- # and format them. If we're cross-compiling, all we get is a link
- # test, which won't test for the appropriate printf format strings.
- if test "$db_cv_build_sequence" = "yes"; then
- AC_TRY_RUN([
- main() {
- $db_cv_seq_type l;
- unsigned $db_cv_seq_type u;
- char buf@<:@100@:>@;
-
- buf@<:@0@:>@ = 'a';
- l = 9223372036854775807LL;
- (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l);
- if (strcmp(buf, "9223372036854775807"))
- return (1);
- u = 18446744073709551615ULL;
- (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u);
- if (strcmp(buf, "18446744073709551615"))
- return (1);
- return (0);
- }],, [db_cv_build_sequence="no"],
- AC_TRY_LINK(,[
- $db_cv_seq_type l;
- unsigned $db_cv_seq_type u;
- char buf@<:@100@:>@;
-
- buf@<:@0@:>@ = 'a';
- l = 9223372036854775807LL;
- (void)snprintf(buf, sizeof(buf), $db_cv_seq_fmt, l);
- if (strcmp(buf, "9223372036854775807"))
- return (1);
- u = 18446744073709551615ULL;
- (void)snprintf(buf, sizeof(buf), $db_cv_seq_ufmt, u);
- if (strcmp(buf, "18446744073709551615"))
- return (1);
- return (0);
- ],, [db_cv_build_sequence="no"]))
- fi
+ db_cv_build_sequence="yes"
if test "$db_cv_build_sequence" = "yes"; then
AC_SUBST(db_seq_decl)
db_seq_decl="typedef $db_cv_seq_type db_seq_t;";

View File

@@ -0,0 +1,132 @@
From a3569f118fd95b7ad41e1a1128e17c0b8928556d Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Sun, 20 Jan 2019 18:30:23 -0800
Subject: [PATCH] Fix libc++ compatibility by renaming atomic_init API
db5 does not build because it is redefining a C++11 standard
library identifier, atomic_init(). Therefore prefix all
its internal defines with '__db_', to avoid collisions.
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
src/dbinc/atomic.h | 4 ++--
src/mp/mp_fget.c | 4 ++--
src/mp/mp_mvcc.c | 4 ++--
src/mp/mp_region.c | 4 ++--
src/mutex/mut_method.c | 2 +-
src/mutex/mut_tas.c | 4 ++--
6 files changed, 11 insertions(+), 11 deletions(-)
--- a/src/dbinc/atomic.h
+++ b/src/dbinc/atomic.h
@@ -70,7 +70,7 @@ typedef struct {
* These have no memory barriers; the caller must include them when necessary.
*/
#define atomic_read(p) ((p)->value)
-#define atomic_init(p, val) ((p)->value = (val))
+#define __db_atomic_init(p, val) ((p)->value = (val))
#ifdef HAVE_ATOMIC_SUPPORT
@@ -206,7 +206,7 @@ static inline int __db_atomic_compare_ex
#define atomic_dec(env, p) (--(p)->value)
#define atomic_compare_exchange(env, p, oldval, newval) \
(DB_ASSERT(env, atomic_read(p) == (oldval)), \
- atomic_init(p, (newval)), 1)
+ __db_atomic_init(p, (newval)), 1)
#else
#define atomic_inc(env, p) __atomic_inc(env, p)
#define atomic_dec(env, p) __atomic_dec(env, p)
--- a/src/mp/mp_fget.c
+++ b/src/mp/mp_fget.c
@@ -649,7 +649,7 @@ alloc: /* Allocate a new buffer header
/* Initialize enough so we can call __memp_bhfree. */
alloc_bhp->flags = 0;
- atomic_init(&alloc_bhp->ref, 1);
+ __db_atomic_init(&alloc_bhp->ref, 1);
#ifdef DIAGNOSTIC
if ((uintptr_t)alloc_bhp->buf & (sizeof(size_t) - 1)) {
__db_errx(env, DB_STR("3025",
@@ -955,7 +955,7 @@ alloc: /* Allocate a new buffer header
MVCC_MPROTECT(bhp->buf, mfp->pagesize,
PROT_READ);
- atomic_init(&alloc_bhp->ref, 1);
+ __db_atomic_init(&alloc_bhp->ref, 1);
MUTEX_LOCK(env, alloc_bhp->mtx_buf);
alloc_bhp->priority = bhp->priority;
alloc_bhp->pgno = bhp->pgno;
--- a/src/mp/mp_mvcc.c
+++ b/src/mp/mp_mvcc.c
@@ -276,7 +276,7 @@ __memp_bh_freeze(dbmp, infop, hp, bhp, n
#else
memcpy(frozen_bhp, bhp, SSZA(BH, buf));
#endif
- atomic_init(&frozen_bhp->ref, 0);
+ __db_atomic_init(&frozen_bhp->ref, 0);
if (mutex != MUTEX_INVALID)
frozen_bhp->mtx_buf = mutex;
else if ((ret = __mutex_alloc(env, MTX_MPOOL_BH,
@@ -428,7 +428,7 @@ __memp_bh_thaw(dbmp, infop, hp, frozen_b
#endif
alloc_bhp->mtx_buf = mutex;
MUTEX_LOCK(env, alloc_bhp->mtx_buf);
- atomic_init(&alloc_bhp->ref, 1);
+ __db_atomic_init(&alloc_bhp->ref, 1);
F_CLR(alloc_bhp, BH_FROZEN);
}
--- a/src/mp/mp_region.c
+++ b/src/mp/mp_region.c
@@ -245,7 +245,7 @@ __memp_init(env, dbmp, reginfo_off, htab
MTX_MPOOL_FILE_BUCKET, 0, &htab[i].mtx_hash)) != 0)
return (ret);
SH_TAILQ_INIT(&htab[i].hash_bucket);
- atomic_init(&htab[i].hash_page_dirty, 0);
+ __db_atomic_init(&htab[i].hash_page_dirty, 0);
}
/*
@@ -302,7 +302,7 @@ no_prealloc:
} else
hp->mtx_hash = mtx_base + (i % dbenv->mp_mtxcount);
SH_TAILQ_INIT(&hp->hash_bucket);
- atomic_init(&hp->hash_page_dirty, 0);
+ __db_atomic_init(&hp->hash_page_dirty, 0);
#ifdef HAVE_STATISTICS
hp->hash_io_wait = 0;
hp->hash_frozen = hp->hash_thawed = hp->hash_frozen_freed = 0;
--- a/src/mutex/mut_method.c
+++ b/src/mutex/mut_method.c
@@ -474,7 +474,7 @@ atomic_compare_exchange(env, v, oldval,
MUTEX_LOCK(env, mtx);
ret = atomic_read(v) == oldval;
if (ret)
- atomic_init(v, newval);
+ __db_atomic_init(v, newval);
MUTEX_UNLOCK(env, mtx);
return (ret);
--- a/src/mutex/mut_tas.c
+++ b/src/mutex/mut_tas.c
@@ -47,7 +47,7 @@ __db_tas_mutex_init(env, mutex, flags)
#ifdef HAVE_SHARED_LATCHES
if (F_ISSET(mutexp, DB_MUTEX_SHARED))
- atomic_init(&mutexp->sharecount, 0);
+ __db_atomic_init(&mutexp->sharecount, 0);
else
#endif
if (MUTEX_INIT(&mutexp->tas)) {
@@ -536,7 +536,7 @@ __db_tas_mutex_unlock(env, mutex)
F_CLR(mutexp, DB_MUTEX_LOCKED);
/* Flush flag update before zeroing count */
MEMBAR_EXIT();
- atomic_init(&mutexp->sharecount, 0);
+ __db_atomic_init(&mutexp->sharecount, 0);
} else {
DB_ASSERT(env, sharecount > 0);
MEMBAR_EXIT();

View File

@@ -0,0 +1,45 @@
From 96b303caf70a7635953c36e5bfb9ad6e75cb7637 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Fri, 14 Feb 2020 14:12:59 -0800
Subject: [PATCH] clock: Do not define own timespec
timespec is provided by libc and its best left to libc
os_gettime takes a db_timespec and passed its address to clock_gettime
which assumes that db_timespec and timespec are same but actually
its 12-bytes here and libc has 16-bytes
This can cause problems especially with 64bit time_t
Upstream-Status: Inappropriate [as far as open source community is concerned, upstream is dead]
Signed-off-by: Khem Raj <raj.khem@gmail.com>
---
src/dbinc/clock.h | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
--- a/src/dbinc/clock.h
+++ b/src/dbinc/clock.h
@@ -44,22 +44,8 @@
extern "C" {
#endif
-/*
- * This declaration is POSIX-compatible. Because there are lots of different
- * time.h include file patterns out there, it's easier to declare our own name
- * in all cases than to try and discover if a system has a struct timespec.
- * For the same reason, and because we'd have to #include <sys/time.h> in db.h,
- * we don't export any timespec structures in the DB API, even in places where
- * it would make sense, like the replication statistics information.
- */
-typedef struct {
- time_t tv_sec; /* seconds */
-#ifdef HAVE_MIXED_SIZE_ADDRESSING
- int32_t tv_nsec;
-#else
- long tv_nsec; /* nanoseconds */
-#endif
-} db_timespec;
+#include <time.h>
+#define db_timespec struct timespec
/* Operations on timespecs */
#undef timespecclear

View File

@@ -0,0 +1,181 @@
--- a/src/mp/mp_stat.c
+++ b/src/mp/mp_stat.c
@@ -87,6 +87,13 @@ __memp_stat(env, gspp, fspp, flags)
u_int32_t i;
uintmax_t tmp_wait, tmp_nowait;
+ /*
+ * The array holding the lengths related to the buffer allocated for *fspp.
+ * The first element of the array holds the number of entries allocated.
+ * The second element of the array holds the total number of bytes allocated.
+ */
+ u_int32_t fsp_len[2];
+
dbmp = env->mp_handle;
mp = dbmp->reginfo[0].primary;
@@ -193,32 +200,53 @@ __memp_stat(env, gspp, fspp, flags)
if (fspp != NULL) {
*fspp = NULL;
- /* Count the MPOOLFILE structures. */
- i = 0;
- len = 0;
- if ((ret = __memp_walk_files(env,
- mp, __memp_count_files, &len, &i, flags)) != 0)
- return (ret);
-
- if (i == 0)
- return (0);
- len += sizeof(DB_MPOOL_FSTAT *); /* Trailing NULL */
+ while (*fspp == NULL) {
+ /* Count the MPOOLFILE structures. */
+ i = 0;
+ /*
+ * Allow space for the first __memp_get_files() to align the
+ * structure array to uintmax_t, DB_MPOOL_STAT's most
+ * restrictive field. [#23150]
+ */
+ len = sizeof(uintmax_t);
+ if ((ret = __memp_walk_files(env,
+ mp, __memp_count_files, &len, &i, flags)) != 0)
+ return (ret);
+
+ if (i == 0)
+ return (0);
+
+ /*
+ * Copy the number of DB_MPOOL_FSTAT entries and the number of
+ * bytes allocated for them into fsp_len. Do not count the space
+ * reserved for allignment.
+ */
+ fsp_len[0] = i;
+ fsp_len[1] = len - sizeof(uintmax_t);
- /* Allocate space */
- if ((ret = __os_umalloc(env, len, fspp)) != 0)
- return (ret);
+ len += sizeof(DB_MPOOL_FSTAT *); /* Trailing NULL */
- tfsp = *fspp;
- *tfsp = NULL;
+ /* Allocate space */
+ if ((ret = __os_umalloc(env, len, fspp)) != 0)
+ return (ret);
- /*
- * Files may have been opened since we counted, don't walk
- * off the end of the allocated space.
- */
- if ((ret = __memp_walk_files(env,
- mp, __memp_get_files, &tfsp, &i, flags)) != 0)
- return (ret);
+ tfsp = *fspp;
+ *tfsp = NULL;
+ /*
+ * Files may have been opened since we counted, if we walk off
+ * the end of the allocated space specified in fsp_len, retry.
+ */
+ if ((ret = __memp_walk_files(env,
+ mp, __memp_get_files, &tfsp, fsp_len, flags)) != 0) {
+ if (ret == DB_BUFFER_SMALL) {
+ __os_ufree(env, *fspp);
+ *fspp = NULL;
+ tfsp = NULL;
+ } else
+ return (ret);
+ }
+ }
*++tfsp = NULL;
}
@@ -286,28 +314,35 @@ __memp_count_files(env, mfp, argp, count
* for the text file names.
*/
static int
-__memp_get_files(env, mfp, argp, countp, flags)
+__memp_get_files(env, mfp, argp, fsp_len, flags)
ENV *env;
MPOOLFILE *mfp;
void *argp;
- u_int32_t *countp;
+ u_int32_t fsp_len[];
u_int32_t flags;
{
DB_MPOOL *dbmp;
DB_MPOOL_FSTAT **tfsp, *tstruct;
char *name, *tname;
- size_t nlen;
+ size_t nlen, tlen;
- if (*countp == 0)
- return (0);
+ /* We walked through more files than argp was allocated for. */
+ if (fsp_len[0] == 0)
+ return DB_BUFFER_SMALL;
dbmp = env->mp_handle;
tfsp = *(DB_MPOOL_FSTAT ***)argp;
if (*tfsp == NULL) {
- /* Add 1 to count because we need to skip over the NULL. */
- tstruct = (DB_MPOOL_FSTAT *)(tfsp + *countp + 1);
- tname = (char *)(tstruct + *countp);
+ /*
+ * Add 1 to count because to skip over the NULL end marker.
+ * Align it further for DB_MPOOL_STAT's most restrictive field
+ * because uintmax_t might require stricter alignment than
+ * pointers; e.g., IP32 LL64 SPARC. [#23150]
+ */
+ tstruct = (DB_MPOOL_FSTAT *)&tfsp[fsp_len[0] + 1];
+ tstruct = ALIGNP_INC(tstruct, sizeof(uintmax_t));
+ tname = (char *)&tstruct[fsp_len[0]];
*tfsp = tstruct;
} else {
tstruct = *tfsp + 1;
@@ -317,6 +352,15 @@ __memp_get_files(env, mfp, argp, countp,
name = __memp_fns(dbmp, mfp);
nlen = strlen(name) + 1;
+
+ /* The space required for file names is larger than argp was allocated for. */
+ tlen = sizeof(DB_MPOOL_FSTAT *) + sizeof(DB_MPOOL_FSTAT) + nlen;
+ if (fsp_len[1] < tlen)
+ return DB_BUFFER_SMALL;
+ else
+ /* Count down the number of bytes left in argp. */
+ fsp_len[1] -= tlen;
+
memcpy(tname, name, nlen);
memcpy(tstruct, &mfp->stat, sizeof(mfp->stat));
tstruct->file_name = tname;
@@ -325,7 +369,9 @@ __memp_get_files(env, mfp, argp, countp,
tstruct->st_pagesize = mfp->pagesize;
*(DB_MPOOL_FSTAT ***)argp = tfsp;
- (*countp)--;
+
+ /* Count down the number of entries left in argp. */
+ fsp_len[0]--;
if (LF_ISSET(DB_STAT_CLEAR))
memset(&mfp->stat, 0, sizeof(mfp->stat));
--- a/src/mp/mp_sync.c
+++ b/src/mp/mp_sync.c
@@ -57,11 +57,13 @@ __memp_walk_files(env, mp, func, arg, co
if ((t_ret = func(env,
mfp, arg, countp, flags)) != 0 && ret == 0)
ret = t_ret;
- if (ret != 0 && !LF_ISSET(DB_STAT_MEMP_NOERROR))
+ if (ret != 0 &&
+ (!LF_ISSET(DB_STAT_MEMP_NOERROR) || ret == DB_BUFFER_SMALL))
break;
}
MUTEX_UNLOCK(env, hp->mtx_hash);
- if (ret != 0 && !LF_ISSET(DB_STAT_MEMP_NOERROR))
+ if (ret != 0 &&
+ (!LF_ISSET(DB_STAT_MEMP_NOERROR) || ret == DB_BUFFER_SMALL))
break;
}
return (ret);

View File

@@ -0,0 +1,600 @@
--- a/src/dbinc_auto/int_def.in
+++ b/src/dbinc_auto/int_def.in
@@ -1373,6 +1373,7 @@
#define __memp_pgread __memp_pgread@DB_VERSION_UNIQUE_NAME@
#define __memp_pg __memp_pg@DB_VERSION_UNIQUE_NAME@
#define __memp_bhfree __memp_bhfree@DB_VERSION_UNIQUE_NAME@
+#define __memp_bh_clear_dirty __memp_bh_clear_dirty@DB_VERSION_UNIQUE_NAME@
#define __memp_fget_pp __memp_fget_pp@DB_VERSION_UNIQUE_NAME@
#define __memp_fget __memp_fget@DB_VERSION_UNIQUE_NAME@
#define __memp_fcreate_pp __memp_fcreate_pp@DB_VERSION_UNIQUE_NAME@
@@ -1397,6 +1398,7 @@
#define __memp_fclose __memp_fclose@DB_VERSION_UNIQUE_NAME@
#define __memp_mf_discard __memp_mf_discard@DB_VERSION_UNIQUE_NAME@
#define __memp_inmemlist __memp_inmemlist@DB_VERSION_UNIQUE_NAME@
+#define __memp_mf_mark_dead __memp_mf_mark_dead@DB_VERSION_UNIQUE_NAME@
#define __memp_fput_pp __memp_fput_pp@DB_VERSION_UNIQUE_NAME@
#define __memp_fput __memp_fput@DB_VERSION_UNIQUE_NAME@
#define __memp_unpin_buffers __memp_unpin_buffers@DB_VERSION_UNIQUE_NAME@
@@ -1455,6 +1457,7 @@
#define __mp_xxx_fh __mp_xxx_fh@DB_VERSION_UNIQUE_NAME@
#define __memp_sync_int __memp_sync_int@DB_VERSION_UNIQUE_NAME@
#define __memp_mf_sync __memp_mf_sync@DB_VERSION_UNIQUE_NAME@
+#define __memp_purge_dead_files __memp_purge_dead_files@DB_VERSION_UNIQUE_NAME@
#define __memp_trickle_pp __memp_trickle_pp@DB_VERSION_UNIQUE_NAME@
#define __mutex_alloc __mutex_alloc@DB_VERSION_UNIQUE_NAME@
#define __mutex_alloc_int __mutex_alloc_int@DB_VERSION_UNIQUE_NAME@
--- a/src/dbinc_auto/mp_ext.h
+++ b/src/dbinc_auto/mp_ext.h
@@ -16,6 +16,7 @@ int __memp_bhwrite __P((DB_MPOOL *, DB_M
int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
int __memp_pg __P((DB_MPOOLFILE *, db_pgno_t, void *, int));
int __memp_bhfree __P((DB_MPOOL *, REGINFO *, MPOOLFILE *, DB_MPOOL_HASH *, BH *, u_int32_t));
+void __memp_bh_clear_dirty __P((ENV*, DB_MPOOL_HASH *, BH *));
int __memp_fget_pp __P((DB_MPOOLFILE *, db_pgno_t *, DB_TXN *, u_int32_t, void *));
int __memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, DB_THREAD_INFO *, DB_TXN *, u_int32_t, void *));
int __memp_fcreate_pp __P((DB_ENV *, DB_MPOOLFILE **, u_int32_t));
@@ -40,6 +41,7 @@ int __memp_fclose_pp __P((DB_MPOOLFILE *
int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t));
int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *, int));
int __memp_inmemlist __P((ENV *, char ***, int *));
+void __memp_mf_mark_dead __P((DB_MPOOL *, MPOOLFILE *, int*));
int __memp_fput_pp __P((DB_MPOOLFILE *, void *, DB_CACHE_PRIORITY, u_int32_t));
int __memp_fput __P((DB_MPOOLFILE *, DB_THREAD_INFO *, void *, DB_CACHE_PRIORITY));
int __memp_unpin_buffers __P((ENV *, DB_THREAD_INFO *));
@@ -98,6 +100,7 @@ int __memp_fsync __P((DB_MPOOLFILE *));
int __mp_xxx_fh __P((DB_MPOOLFILE *, DB_FH **));
int __memp_sync_int __P((ENV *, DB_MPOOLFILE *, u_int32_t, u_int32_t, u_int32_t *, int *));
int __memp_mf_sync __P((DB_MPOOL *, MPOOLFILE *, int));
+int __memp_purge_dead_files __P((ENV *));
int __memp_trickle_pp __P((DB_ENV *, int, int *));
#if defined(__cplusplus)
--- a/src/mp/mp_bh.c
+++ b/src/mp/mp_bh.c
@@ -474,11 +474,8 @@ file_dead:
if (F_ISSET(bhp, BH_DIRTY | BH_TRASH)) {
MUTEX_LOCK(env, hp->mtx_hash);
DB_ASSERT(env, !SH_CHAIN_HASNEXT(bhp, vc));
- if (ret == 0 && F_ISSET(bhp, BH_DIRTY)) {
- F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
- DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0);
- atomic_dec(env, &hp->hash_page_dirty);
- }
+ if (ret == 0)
+ __memp_bh_clear_dirty(env, hp, bhp);
/* put the page back if necessary. */
if ((ret != 0 || BH_REFCOUNT(bhp) > 1) &&
@@ -688,3 +685,29 @@ no_hp: if (mfp != NULL)
return (ret);
}
+
+/*
+ * __memp_bh_clear_dirty --
+ * Clear the dirty flag of of a buffer. Calls on the same buffer must be
+ * serialized to get the accounting correct. This can be achieved by
+ * acquiring an exclusive lock on the buffer, a shared lock on the
+ * buffer plus an exclusive lock on the hash bucket, or some other
+ * mechanism that guarantees single-thread access to the entire region
+ * (e.g. during __memp_region_bhfree()).
+ *
+ * PUBLIC: void __memp_bh_clear_dirty __P((ENV*, DB_MPOOL_HASH *, BH *));
+ */
+void
+__memp_bh_clear_dirty(env, hp, bhp)
+ ENV *env;
+ DB_MPOOL_HASH *hp;
+ BH *bhp;
+{
+ COMPQUIET(env, env);
+ if (F_ISSET(bhp, BH_DIRTY)) {
+ F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
+ DB_ASSERT(env, atomic_read(&hp->hash_page_dirty) > 0);
+ (void)atomic_dec(env, &hp->hash_page_dirty);
+ }
+}
+
--- a/src/mp/mp_fget.c
+++ b/src/mp/mp_fget.c
@@ -439,12 +439,7 @@ thawed: need_free = (atomic_dec(env, &
if (flags == DB_MPOOL_FREE) {
freebuf: MUTEX_LOCK(env, hp->mtx_hash);
h_locked = 1;
- if (F_ISSET(bhp, BH_DIRTY)) {
- F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE);
- DB_ASSERT(env,
- atomic_read(&hp->hash_page_dirty) > 0);
- atomic_dec(env, &hp->hash_page_dirty);
- }
+ __memp_bh_clear_dirty(env, hp, bhp);
/*
* If the buffer we found is already freed, we're done.
--- a/src/mp/mp_fopen.c
+++ b/src/mp/mp_fopen.c
@@ -14,6 +14,7 @@
#include "dbinc/db_page.h"
#include "dbinc/hash.h"
+static int __memp_count_dead_mutex __P((DB_MPOOL *, u_int32_t *));
static int __memp_mpf_alloc __P((DB_MPOOL *,
DB_MPOOLFILE *, const char *, u_int32_t, u_int32_t, MPOOLFILE **));
static int __memp_mpf_find __P((ENV *,
@@ -711,7 +712,11 @@ __memp_mpf_find(env, dbmfp, hp, path, fl
*/
if (LF_ISSET(DB_TRUNCATE)) {
MUTEX_LOCK(env, mfp->mutex);
- mfp->deadfile = 1;
+ /*
+ * We cannot purge dead files here, because the caller
+ * is holding the mutex of the hash bucket of mfp.
+ */
+ __memp_mf_mark_dead(dbmp, mfp, NULL);
MUTEX_UNLOCK(env, mfp->mutex);
continue;
}
@@ -909,10 +914,11 @@ __memp_fclose(dbmfp, flags)
MPOOLFILE *mfp;
char *rpath;
u_int32_t ref;
- int deleted, ret, t_ret;
+ int deleted, purge_dead, ret, t_ret;
env = dbmfp->env;
dbmp = env->mp_handle;
+ purge_dead = 0;
ret = 0;
/*
@@ -1006,7 +1012,7 @@ __memp_fclose(dbmfp, flags)
if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) {
if (LF_ISSET(DB_MPOOL_DISCARD) ||
F_ISSET(mfp, MP_TEMP) || mfp->unlink_on_close) {
- mfp->deadfile = 1;
+ __memp_mf_mark_dead(dbmp, mfp, &purge_dead);
}
if (mfp->unlink_on_close) {
if ((t_ret = __db_appname(dbmp->env, DB_APP_DATA,
@@ -1039,6 +1045,8 @@ __memp_fclose(dbmfp, flags)
}
if (!deleted && !LF_ISSET(DB_MPOOL_NOLOCK))
MUTEX_UNLOCK(env, mfp->mutex);
+ if (purge_dead)
+ (void)__memp_purge_dead_files(env);
done: /* Discard the DB_MPOOLFILE structure. */
if (dbmfp->pgcookie != NULL) {
@@ -1093,7 +1101,7 @@ __memp_mf_discard(dbmp, mfp, hp_locked)
* mutex so we don't deadlock. Make sure nobody ever looks at this
* structure again.
*/
- mfp->deadfile = 1;
+ __memp_mf_mark_dead(dbmp, mfp, NULL);
/* Discard the mutex we're holding and return it too the pool. */
MUTEX_UNLOCK(env, mfp->mutex);
@@ -1218,3 +1226,104 @@ nomem: MUTEX_UNLOCK(env, hp->mtx_hash);
*namesp = NULL;
return (ret);
}
+
+/*
+ * __memp_mf_mark_dead --
+ * Mark an MPOOLFILE as dead because its contents are no longer necessary.
+ * This happens when removing, truncation, or closing an unnamed in-memory
+ * database. Return, in the purgep parameter, whether the caller should
+ * call __memp_purge_dead_files() after the lock on mfp is released. The
+ * caller must hold an exclusive lock on the mfp handle.
+ *
+ * PUBLIC: void __memp_mf_mark_dead __P((DB_MPOOL *, MPOOLFILE *, int*));
+ */
+void
+__memp_mf_mark_dead(dbmp, mfp, purgep)
+ DB_MPOOL *dbmp;
+ MPOOLFILE *mfp;
+ int *purgep;
+{
+ ENV *env;
+#ifdef HAVE_MUTEX_SUPPORT
+ REGINFO *infop;
+ DB_MUTEXREGION *mtxregion;
+ u_int32_t mutex_max, mutex_inuse, dead_mutex;
+#endif
+
+ if (purgep != NULL)
+ *purgep = 0;
+
+ env = dbmp->env;
+
+#ifdef HAVE_MUTEX_SUPPORT
+ MUTEX_REQUIRED(env, mfp->mutex);
+
+ if (MUTEX_ON(env) && mfp->deadfile == 0) {
+ infop = &env->mutex_handle->reginfo;
+ mtxregion = infop->primary;
+
+ mutex_inuse = mtxregion->stat.st_mutex_inuse;
+ if ((mutex_max = env->dbenv->mutex_max) == 0)
+ mutex_max = infop->rp->max / mtxregion->mutex_size;
+
+ /*
+ * Purging dead pages requires a full scan of the entire cache
+ * buffer, so it is a slow operation. We only want to do it
+ * when it is necessary and provides enough benefits. Below is
+ * a simple heuristic that determines when to purge all dead
+ * pages.
+ */
+ if (purgep != NULL && mutex_inuse > mutex_max - 200) {
+ /*
+ * If the mutex region is almost full and there are
+ * many mutexes held by dead files, purge dead files.
+ */
+ (void)__memp_count_dead_mutex(dbmp, &dead_mutex);
+ dead_mutex += mfp->block_cnt + 1;
+
+ if (dead_mutex > mutex_inuse / 20)
+ *purgep = 1;
+ }
+ }
+#endif
+
+ mfp->deadfile = 1;
+}
+
+/*
+ * __memp_count_dead_mutex --
+ * Estimate the number of mutexes held by dead files.
+ */
+static int
+__memp_count_dead_mutex(dbmp, dead_mutex)
+ DB_MPOOL *dbmp;
+ u_int32_t *dead_mutex;
+{
+ ENV *env;
+ DB_MPOOL_HASH *hp;
+ MPOOL *mp;
+ MPOOLFILE *mfp;
+ u_int32_t mutex_per_file;
+ int busy, i;
+
+ env = dbmp->env;
+ *dead_mutex = 0;
+ mutex_per_file = 1;
+#ifndef HAVE_ATOMICFILEREAD
+ mutex_per_file = 2;
+#endif
+ mp = dbmp->reginfo[0].primary;
+ hp = R_ADDR(dbmp->reginfo, mp->ftab);
+ for (i = 0; i < MPOOL_FILE_BUCKETS; i++, hp++) {
+ busy = MUTEX_TRYLOCK(env, hp->mtx_hash);
+ if (busy)
+ continue;
+ SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) {
+ if (mfp->deadfile)
+ *dead_mutex += mfp->block_cnt + mutex_per_file;
+ }
+ MUTEX_UNLOCK(env, hp->mtx_hash);
+ }
+
+ return (0);
+}
--- a/src/mp/mp_method.c
+++ b/src/mp/mp_method.c
@@ -640,7 +640,7 @@ __memp_nameop(env, fileid, newname, full
MPOOLFILE *mfp;
roff_t newname_off;
u_int32_t bucket;
- int locked, ret;
+ int locked, purge_dead, ret;
size_t nlen;
void *p;
@@ -657,6 +657,7 @@ __memp_nameop(env, fileid, newname, full
nhp = NULL;
p = NULL;
locked = ret = 0;
+ purge_dead = 0;
if (!MPOOL_ON(env))
goto fsop;
@@ -749,7 +750,7 @@ __memp_nameop(env, fileid, newname, full
*/
if (mfp->no_backing_file)
mfp->mpf_cnt--;
- mfp->deadfile = 1;
+ __memp_mf_mark_dead(dbmp, mfp, &purge_dead);
MUTEX_UNLOCK(env, mfp->mutex);
} else {
/*
@@ -808,6 +809,12 @@ err: if (p != NULL) {
if (nhp != NULL && nhp != hp)
MUTEX_UNLOCK(env, nhp->mtx_hash);
}
+ /*
+ * __memp_purge_dead_files() must be called when the hash bucket is
+ * unlocked.
+ */
+ if (purge_dead)
+ (void)__memp_purge_dead_files(env);
return (ret);
}
--- a/src/mp/mp_sync.c
+++ b/src/mp/mp_sync.c
@@ -26,6 +26,7 @@ static int __memp_close_flush_files __P(
static int __memp_sync_files __P((ENV *));
static int __memp_sync_file __P((ENV *,
MPOOLFILE *, void *, u_int32_t *, u_int32_t));
+static inline void __update_err_ret(int, int*);
/*
* __memp_walk_files --
@@ -965,3 +966,123 @@ __bhcmp(p1, p2)
return (1);
return (0);
}
+
+/*
+ * __memp_purge_dead_files --
+ * Remove all dead files and their buffers from the mpool. The caller
+ * cannot hold any lock on the dead MPOOLFILE handles, their buffers
+ * or their hash buckets.
+ *
+ * PUBLIC: int __memp_purge_dead_files __P((ENV *));
+ */
+int
+__memp_purge_dead_files(env)
+ ENV *env;
+{
+ BH *bhp;
+ DB_MPOOL *dbmp;
+ DB_MPOOL_HASH *hp, *hp_end;
+ REGINFO *infop;
+ MPOOL *c_mp, *mp;
+ MPOOLFILE *mfp;
+ u_int32_t i_cache;
+ int ret, t_ret, h_lock;
+
+ if (!MPOOL_ON(env))
+ return (0);
+
+ dbmp = env->mp_handle;
+ mp = dbmp->reginfo[0].primary;
+ ret = t_ret = h_lock = 0;
+
+ /*
+ * Walk each cache's list of buffers and free all buffers whose
+ * MPOOLFILE is marked as dead.
+ */
+ for (i_cache = 0; i_cache < mp->nreg; i_cache++) {
+ infop = &dbmp->reginfo[i_cache];
+ c_mp = infop->primary;
+
+ hp = R_ADDR(infop, c_mp->htab);
+ hp_end = &hp[c_mp->htab_buckets];
+ for (; hp < hp_end; hp++) {
+ /* Skip empty buckets. */
+ if (SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL)
+ continue;
+
+ /*
+ * Search for a dead buffer. Other places that call
+ * __memp_bhfree() acquire the buffer lock before the
+ * hash bucket lock. Even though we acquire the two
+ * locks in reverse order, we cannot deadlock here
+ * because we don't block waiting for the locks.
+ */
+ t_ret = MUTEX_TRYLOCK(env, hp->mtx_hash);
+ if (t_ret != 0) {
+ __update_err_ret(t_ret, &ret);
+ continue;
+ }
+ h_lock = 1;
+ SH_TAILQ_FOREACH(bhp, &hp->hash_bucket, hq, __bh) {
+ /* Skip buffers that are being used. */
+ if (BH_REFCOUNT(bhp) > 0)
+ continue;
+
+ mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset);
+ if (!mfp->deadfile)
+ continue;
+
+ /* Found a dead buffer. Prepare to free it. */
+ t_ret = MUTEX_TRYLOCK(env, bhp->mtx_buf);
+ if (t_ret != 0) {
+ __update_err_ret(t_ret, &ret);
+ continue;
+ }
+
+ DB_ASSERT(env, (!F_ISSET(bhp, BH_EXCLUSIVE) &&
+ BH_REFCOUNT(bhp) == 0));
+ F_SET(bhp, BH_EXCLUSIVE);
+ (void)atomic_inc(env, &bhp->ref);
+
+ __memp_bh_clear_dirty(env, hp, bhp);
+
+ /*
+ * Free the buffer. The buffer and hash bucket
+ * are unlocked by __memp_bhfree.
+ */
+ if ((t_ret = __memp_bhfree(dbmp, infop, mfp,
+ hp, bhp, BH_FREE_FREEMEM)) == 0)
+ /*
+ * Decrement hp, so the next turn will
+ * search the same bucket again.
+ */
+ hp--;
+ else
+ __update_err_ret(t_ret, &ret);
+
+ /*
+ * The hash bucket is unlocked, we need to
+ * start over again.
+ */
+ h_lock = 0;
+ break;
+ }
+
+ if (h_lock) {
+ MUTEX_UNLOCK(env, hp->mtx_hash);
+ h_lock = 0;
+ }
+ }
+ }
+
+ return (ret);
+}
+
+static inline void
+__update_err_ret(t_ret, retp)
+ int t_ret;
+ int *retp;
+{
+ if (t_ret != 0 && t_ret != DB_LOCK_NOTGRANTED && *retp == 0)
+ *retp = t_ret;
+}
--- a/src/mp/mp_trickle.c
+++ b/src/mp/mp_trickle.c
@@ -67,6 +67,10 @@ __memp_trickle(env, pct, nwrotep)
return (EINVAL);
}
+ /* First we purge all dead files and their buffers. */
+ if ((ret = __memp_purge_dead_files(env)) != 0)
+ return (ret);
+
/*
* Loop through the caches counting total/dirty buffers.
*
--- a/src/mutex/mut_region.c
+++ b/src/mutex/mut_region.c
@@ -17,7 +17,7 @@
static db_size_t __mutex_align_size __P((ENV *));
static int __mutex_region_init __P((ENV *, DB_MUTEXMGR *));
static size_t __mutex_region_size __P((ENV *));
-static size_t __mutex_region_max __P((ENV *));
+static size_t __mutex_region_max __P((ENV *, u_int32_t));
/*
* __mutex_open --
@@ -34,7 +34,7 @@ __mutex_open(env, create_ok)
DB_MUTEXMGR *mtxmgr;
DB_MUTEXREGION *mtxregion;
size_t size;
- u_int32_t cpu_count;
+ u_int32_t cpu_count, mutex_needed;
int ret;
#ifndef HAVE_ATOMIC_SUPPORT
u_int i;
@@ -61,19 +61,20 @@ __mutex_open(env, create_ok)
}
/*
- * If the user didn't set an absolute value on the number of mutexes
- * we'll need, figure it out. We're conservative in our allocation,
- * we need mutexes for DB handles, group-commit queues and other things
- * applications allocate at run-time. The application may have kicked
- * up our count to allocate its own mutexes, add that in.
+ * Figure out the number of mutexes we'll need. We're conservative in
+ * our allocation, we need mutexes for DB handles, group-commit queues
+ * and other things applications allocate at run-time. The application
+ * may have kicked up our count to allocate its own mutexes, add that
+ * in.
*/
+ mutex_needed =
+ __lock_region_mutex_count(env) +
+ __log_region_mutex_count(env) +
+ __memp_region_mutex_count(env) +
+ __txn_region_mutex_count(env);
if (dbenv->mutex_cnt == 0 &&
F_ISSET(env, ENV_PRIVATE | ENV_THREAD) != ENV_PRIVATE)
- dbenv->mutex_cnt =
- __lock_region_mutex_count(env) +
- __log_region_mutex_count(env) +
- __memp_region_mutex_count(env) +
- __txn_region_mutex_count(env);
+ dbenv->mutex_cnt = mutex_needed;
if (dbenv->mutex_max != 0 && dbenv->mutex_cnt > dbenv->mutex_max)
dbenv->mutex_cnt = dbenv->mutex_max;
@@ -90,8 +91,8 @@ __mutex_open(env, create_ok)
size = __mutex_region_size(env);
if (create_ok)
F_SET(&mtxmgr->reginfo, REGION_CREATE_OK);
- if ((ret = __env_region_attach(env,
- &mtxmgr->reginfo, size, size + __mutex_region_max(env))) != 0)
+ if ((ret = __env_region_attach(env, &mtxmgr->reginfo,
+ size, size + __mutex_region_max(env, mutex_needed))) != 0)
goto err;
/* If we created the region, initialize it. */
@@ -352,9 +353,13 @@ __mutex_region_size(env)
s = sizeof(DB_MUTEXMGR) + 1024;
- /* We discard one mutex for the OOB slot. */
+ /*
+ * We discard one mutex for the OOB slot. Make sure mutex_cnt doesn't
+ * overflow.
+ */
s += __env_alloc_size(
- (dbenv->mutex_cnt + 1) *__mutex_align_size(env));
+ (dbenv->mutex_cnt + (dbenv->mutex_cnt == UINT32_MAX ? 0 : 1)) *
+ __mutex_align_size(env));
return (s);
}
@@ -364,28 +369,42 @@ __mutex_region_size(env)
* Return the amount of space needed to reach the maximum size.
*/
static size_t
-__mutex_region_max(env)
+__mutex_region_max(env, mutex_needed)
ENV *env;
+ u_int32_t mutex_needed;
{
DB_ENV *dbenv;
- u_int32_t max;
+ u_int32_t max, mutex_cnt;
dbenv = env->dbenv;
+ mutex_cnt = dbenv->mutex_cnt;
- if ((max = dbenv->mutex_max) == 0) {
+ /*
+ * We want to limit the region size to accommodate at most UINT32_MAX
+ * mutexes. If mutex_cnt is UINT32_MAX, no more space is allowed.
+ */
+ if ((max = dbenv->mutex_max) == 0 && mutex_cnt != UINT32_MAX)
if (F_ISSET(env, ENV_PRIVATE | ENV_THREAD) == ENV_PRIVATE)
- max = dbenv->mutex_inc + 1;
- else
+ if (dbenv->mutex_inc + 1 < UINT32_MAX - mutex_cnt)
+ max = dbenv->mutex_inc + 1 + mutex_cnt;
+ else
+ max = UINT32_MAX;
+ else {
max = __lock_region_mutex_max(env) +
__txn_region_mutex_max(env) +
__log_region_mutex_max(env) +
dbenv->mutex_inc + 100;
- } else if (max <= dbenv->mutex_cnt)
+ if (max < UINT32_MAX - mutex_needed)
+ max += mutex_needed;
+ else
+ max = UINT32_MAX;
+ }
+
+ if (max <= mutex_cnt)
return (0);
else
- max -= dbenv->mutex_cnt;
-
- return ( __env_alloc_size(max * __mutex_align_size(env)));
+ return (__env_alloc_size(
+ (max - mutex_cnt) * __mutex_align_size(env)));
}
#ifdef HAVE_MUTEX_SYSTEM_RESOURCES

View File

@@ -0,0 +1,20 @@
--- a/lang/sql/sqlite/tool/lemon.c
+++ b/lang/sql/sqlite/tool/lemon.c
@@ -3428,7 +3428,7 @@ void print_stack_union(
int maxdtlength; /* Maximum length of any ".datatype" field. */
char *stddt; /* Standardized name for a datatype */
int i,j; /* Loop counters */
- int hash; /* For hashing the name of a type */
+ unsigned hash; /* For hashing the name of a type */
const char *name; /* Name of the parser */
/* Allocate and initialize types[] and allocate stddt[] */
@@ -3491,7 +3491,7 @@ void print_stack_union(
break;
}
hash++;
- if( hash>=arraysize ) hash = 0;
+ if( hash>=(unsigned)arraysize ) hash = 0;
}
if( types[hash]==0 ){
sp->dtnum = hash + 1;

View File

@@ -0,0 +1,18 @@
Author: Filip Januš <fjanus@redhat.com>
Date: 6 Sep 2021
Related: https://bugzilla.redhat.com/show_bug.cgi?id=1992402
Patch was created based on the discussion in the previous link
--- a/src/os/os_map.c
+++ b/src/os/os_map.c
@@ -213,7 +213,10 @@ __os_attach(env, infop, rp)
if (rp->max < rp->size)
rp->max = rp->size;
if (ret == 0 && F_ISSET(infop, REGION_CREATE)) {
- if (F_ISSET(dbenv, DB_ENV_REGION_INIT))
+
+ rp->size = rp->max;
+
+ if (F_ISSET(dbenv, DB_ENV_REGION_INIT))
ret = __db_file_write(env, infop->fhp,
rp->size / MEGABYTE, rp->size % MEGABYTE, 0x00);
else

View File

@@ -0,0 +1,693 @@
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -282,6 +282,8 @@ __bamc_refresh(dbc)
*
* Recno uses the btree bt_ovflsize value -- it's close enough.
*/
+ if (t->bt_minkey == 0)
+ return (DB_RECOVER);
cp->ovflsize = B_MINKEY_TO_OVFLSIZE(
dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize);
--- a/src/btree/bt_verify.c
+++ b/src/btree/bt_verify.c
@@ -611,7 +611,11 @@ __bam_vrfy_inp(dbp, vdp, h, pgno, nentri
isbad = 1;
goto err;
default:
- DB_ASSERT(env, ret != 0);
+ if (ret == 0) {
+ isbad = 1;
+ ret = DB_VERIFY_FATAL;
+ goto err;
+ }
break;
}
@@ -922,7 +926,7 @@ __bam_vrfy_itemorder(dbp, vdp, ip, h, pg
DBT dbta, dbtb, dup_1, dup_2, *p1, *p2, *tmp;
ENV *env;
PAGE *child;
- db_pgno_t cpgno;
+ db_pgno_t cpgno, grandparent;
VRFY_PAGEINFO *pip;
db_indx_t i, *inp;
int adj, cmp, freedup_1, freedup_2, isbad, ret, t_ret;
@@ -954,7 +958,8 @@ __bam_vrfy_itemorder(dbp, vdp, ip, h, pg
buf1 = buf2 = NULL;
- DB_ASSERT(env, !LF_ISSET(DB_NOORDERCHK));
+ if (LF_ISSET(DB_NOORDERCHK))
+ return (EINVAL);
dupfunc = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare;
if (TYPE(h) == P_LDUP)
@@ -963,6 +968,7 @@ __bam_vrfy_itemorder(dbp, vdp, ip, h, pg
func = __bam_defcmp;
if (dbp->bt_internal != NULL) {
bt = (BTREE *)dbp->bt_internal;
+ grandparent = bt->bt_root;
if (TYPE(h) == P_IBTREE && (bt->bt_compare != NULL ||
dupfunc != __bam_defcmp)) {
/*
@@ -974,8 +980,24 @@ __bam_vrfy_itemorder(dbp, vdp, ip, h, pg
*/
mpf = dbp->mpf;
child = h;
+ cpgno = pgno;
while (TYPE(child) == P_IBTREE) {
+ if (NUM_ENT(child) == 0) {
+ EPRINT((env, DB_STR_A("1088",
+ "Page %lu: internal page is empty and should not be",
+ "%lu"), (u_long)cpgno));
+ ret = DB_VERIFY_BAD;
+ goto err;
+ }
bi = GET_BINTERNAL(dbp, child, 0);
+ if (grandparent == bi->pgno) {
+ EPRINT((env, DB_STR_A("5552",
+ "Page %lu: found twice in the btree",
+ "%lu"), (u_long)grandparent));
+ ret = DB_VERIFY_FATAL;
+ goto err;
+ } else
+ grandparent = cpgno;
cpgno = bi->pgno;
if (child != h &&
(ret = __memp_fput(mpf,
@@ -1231,7 +1253,10 @@ overflow: if (!ovflok) {
*/
if (dup_1.data == NULL ||
dup_2.data == NULL) {
- DB_ASSERT(env, !ovflok);
+ if (ovflok) {
+ isbad = 1;
+ goto err;
+ }
if (pip != NULL)
F_SET(pip,
VRFY_INCOMPLETE);
@@ -1569,9 +1594,10 @@ bad_prev: isbad = 1;
(ret = __db_vrfy_ovfl_structure(dbp, vdp,
child->pgno, child->tlen,
flags | DB_ST_OVFL_LEAF)) != 0) {
- if (ret == DB_VERIFY_BAD)
+ if (ret == DB_VERIFY_BAD) {
isbad = 1;
- else
+ break;
+ } else
goto done;
}
@@ -1645,9 +1671,10 @@ bad_prev: isbad = 1;
stflags | DB_ST_TOPLEVEL,
NULL, NULL, NULL)) != 0) {
if (ret ==
- DB_VERIFY_BAD)
+ DB_VERIFY_BAD) {
isbad = 1;
- else
+ break;
+ } else
goto err;
}
}
@@ -1790,7 +1817,10 @@ bad_prev: isbad = 1;
*/
/* Otherwise, __db_vrfy_childput would be broken. */
- DB_ASSERT(env, child->refcnt >= 1);
+ if (child->refcnt < 1) {
+ isbad = 1;
+ goto err;
+ }
/*
* An overflow referenced more than twice here
@@ -1807,9 +1837,10 @@ bad_prev: isbad = 1;
if ((ret = __db_vrfy_ovfl_structure(dbp,
vdp, child->pgno, child->tlen,
flags)) != 0) {
- if (ret == DB_VERIFY_BAD)
+ if (ret == DB_VERIFY_BAD) {
isbad = 1;
- else
+ break;
+ } else
goto done;
}
}
@@ -1847,9 +1878,10 @@ bad_prev: isbad = 1;
if ((ret = __bam_vrfy_subtree(dbp, vdp, li->pgno,
i == 0 ? NULL : li, ri, flags, &child_level,
&child_nrecs, NULL)) != 0) {
- if (ret == DB_VERIFY_BAD)
+ if (ret == DB_VERIFY_BAD) {
isbad = 1;
- else
+ break;
+ } else
goto done;
}
@@ -2675,7 +2707,11 @@ __bam_meta2pgset(dbp, vdp, btmeta, flags
db_pgno_t current, p;
int err_ret, ret;
- DB_ASSERT(dbp->env, pgset != NULL);
+ if (pgset == NULL) {
+ EPRINT((dbp->env, DB_STR("5542",
+ "Error, database contains no visible pages.")));
+ return (DB_RUNRECOVERY);
+ }
mpf = dbp->mpf;
h = NULL;
--- a/src/db/db_conv.c
+++ b/src/db/db_conv.c
@@ -493,8 +493,11 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
db_indx_t i, *inp, len, tmp;
u_int8_t *end, *p, *pgend;
- if (pagesize == 0)
- return (0);
+ /* This function is also used to byteswap logs, so
+ * the pagesize might not be an actual page size.
+ */
+ if (!(pagesize >= 24 && pagesize <= DB_MAX_PGSIZE))
+ return (EINVAL);
if (pgin) {
M_32_SWAP(h->lsn.file);
@@ -513,26 +516,41 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
pgend = (u_int8_t *)h + pagesize;
inp = P_INP(dbp, h);
- if ((u_int8_t *)inp >= pgend)
- goto out;
+ if ((u_int8_t *)inp > pgend)
+ return (__db_pgfmt(env, pg));
switch (TYPE(h)) {
case P_HASH_UNSORTED:
case P_HASH:
for (i = 0; i < NUM_ENT(h); i++) {
+ if ((u_int8_t*)(inp + i) >= pgend)
+ return (__db_pgfmt(env, pg));
+ if (inp[i] == 0)
+ continue;
if (pgin)
M_16_SWAP(inp[i]);
+ if (inp[i] >= pagesize)
+ return (__db_pgfmt(env, pg));
- if (P_ENTRY(dbp, h, i) >= pgend)
- continue;
+ if (P_ENTRY(dbp, h, i) >= pgend)
+ return (__db_pgfmt(env, pg));
switch (HPAGE_TYPE(dbp, h, i)) {
case H_KEYDATA:
break;
case H_DUPLICATE:
+ if (LEN_HITEM(dbp, h, pagesize, i) <
+ HKEYDATA_SIZE(0))
+ return (__db_pgfmt(env, pg));
+
len = LEN_HKEYDATA(dbp, h, pagesize, i);
p = HKEYDATA_DATA(P_ENTRY(dbp, h, i));
- for (end = p + len; p < end;) {
+
+ end = p + len;
+ if (end > pgend)
+ return (__db_pgfmt(env, pg));
+
+ while (p < end) {
if (pgin) {
P_16_SWAP(p);
memcpy(&tmp,
@@ -544,14 +562,20 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
SWAP16(p);
}
p += tmp;
+ if (p >= end)
+ return (__db_pgfmt(env, pg));
SWAP16(p);
}
break;
case H_OFFDUP:
+ if ((inp[i] + HOFFDUP_SIZE) > pagesize)
+ return (__db_pgfmt(env, pg));
p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
SWAP32(p); /* pgno */
break;
case H_OFFPAGE:
+ if ((inp[i] + HOFFPAGE_SIZE) > pagesize)
+ return (__db_pgfmt(env, pg));
p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i));
SWAP32(p); /* pgno */
SWAP32(p); /* tlen */
@@ -559,7 +583,6 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
default:
return (__db_pgfmt(env, pg));
}
-
}
/*
@@ -576,8 +599,12 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
case P_LDUP:
case P_LRECNO:
for (i = 0; i < NUM_ENT(h); i++) {
+ if ((u_int8_t *)(inp + i) >= pgend)
+ return (__db_pgfmt(env, pg));
if (pgin)
M_16_SWAP(inp[i]);
+ if (inp[i] >= pagesize)
+ return (__db_pgfmt(env, pg));
/*
* In the case of on-page duplicates, key information
@@ -597,7 +624,7 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
bk = GET_BKEYDATA(dbp, h, i);
if ((u_int8_t *)bk >= pgend)
- continue;
+ return (__db_pgfmt(env, pg));
switch (B_TYPE(bk->type)) {
case B_KEYDATA:
M_16_SWAP(bk->len);
@@ -605,6 +632,8 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
case B_DUPLICATE:
case B_OVERFLOW:
bo = (BOVERFLOW *)bk;
+ if (((u_int8_t *)bo + BOVERFLOW_SIZE) > pgend)
+ return (__db_pgfmt(env, pg));
M_32_SWAP(bo->pgno);
M_32_SWAP(bo->tlen);
break;
@@ -618,12 +647,17 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
break;
case P_IBTREE:
for (i = 0; i < NUM_ENT(h); i++) {
+ if ((u_int8_t *)(inp + i) > pgend)
+ return (__db_pgfmt(env, pg));
if (pgin)
M_16_SWAP(inp[i]);
+ if ((u_int16_t)(inp[i] +
+ BINTERNAL_SIZE(0) - 1) > pagesize)
+ break;
bi = GET_BINTERNAL(dbp, h, i);
- if ((u_int8_t *)bi >= pgend)
- continue;
+ if (((u_int8_t *)bi + BINTERNAL_SIZE(0)) > pgend)
+ return (__db_pgfmt(env, pg));
M_16_SWAP(bi->len);
M_32_SWAP(bi->pgno);
@@ -634,6 +668,10 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
break;
case B_DUPLICATE:
case B_OVERFLOW:
+ if ((u_int16_t)(inp[i] +
+ BINTERNAL_SIZE(BOVERFLOW_SIZE) - 1) >
+ pagesize)
+ goto out;
bo = (BOVERFLOW *)bi->data;
M_32_SWAP(bo->pgno);
M_32_SWAP(bo->tlen);
@@ -648,12 +686,16 @@ __db_byteswap(dbp, pg, h, pagesize, pgin
break;
case P_IRECNO:
for (i = 0; i < NUM_ENT(h); i++) {
+ if ((u_int8_t *)(inp + i) >= pgend)
+ return (__db_pgfmt(env, pg));
if (pgin)
M_16_SWAP(inp[i]);
+ if (inp[i] >= pagesize)
+ return (__db_pgfmt(env, pg));
ri = GET_RINTERNAL(dbp, h, i);
- if ((u_int8_t *)ri >= pgend)
- continue;
+ if ((((u_int8_t *)ri) + RINTERNAL_SIZE) > pgend)
+ return (__db_pgfmt(env, pg));
M_32_SWAP(ri->pgno);
M_32_SWAP(ri->nrecs);
--- a/src/db/db_vrfy.c
+++ b/src/db/db_vrfy.c
@@ -375,8 +375,10 @@ __db_verify(dbp, ip, name, subdb, handle
vdp, name, 0, lp, rp, flags)) != 0) {
if (t_ret == DB_VERIFY_BAD)
isbad = 1;
- else
- goto err;
+ else {
+ ret = t_ret;
+ goto err;
+ }
}
/*
@@ -764,9 +766,10 @@ __db_vrfy_walkpages(dbp, vdp, handle, ca
*/
if ((t_ret = __memp_fget(mpf, &i,
vdp->thread_info, NULL, 0, &h)) != 0) {
- if (dbp->type == DB_HASH ||
+ if ((dbp->type == DB_HASH ||
(dbp->type == DB_QUEUE &&
- F_ISSET(dbp, DB_AM_INMEM))) {
+ F_ISSET(dbp, DB_AM_INMEM))) &&
+ t_ret != DB_RUNRECOVERY) {
if ((t_ret =
__db_vrfy_getpageinfo(vdp, i, &pip)) != 0)
goto err1;
@@ -936,6 +939,8 @@ err: if (h != NULL && (t_ret = __memp_f
return (ret == 0 ? t_ret : ret);
}
+ if (ret == DB_PAGE_NOTFOUND && isbad == 1)
+ ret = 0;
return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret);
}
@@ -1567,7 +1572,7 @@ __db_vrfy_meta(dbp, vdp, meta, pgno, fla
if (pgno == PGNO_BASE_MD &&
dbtype != DB_QUEUE && meta->last_pgno != vdp->last_pgno) {
#ifdef HAVE_FTRUNCATE
- isbad = 1;
+ ret = DB_VERIFY_FATAL;
EPRINT((env, DB_STR_A("0552",
"Page %lu: last_pgno is not correct: %lu != %lu",
"%lu %lu %lu"), (u_long)pgno,
@@ -1608,7 +1613,11 @@ __db_vrfy_freelist(dbp, vdp, meta, flags
env = dbp->env;
pgset = vdp->pgset;
- DB_ASSERT(env, pgset != NULL);
+ if (pgset == NULL) {
+ EPRINT((env, DB_STR("5543",
+ "Error, database contains no visible pages.")));
+ return (DB_RUNRECOVERY);
+ }
if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0)
return (ret);
@@ -1993,7 +2002,8 @@ __db_salvage_pg(dbp, vdp, pgno, h, handl
int keyflag, ret, t_ret;
env = dbp->env;
- DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
+ if (!LF_ISSET(DB_SALVAGE))
+ return (EINVAL);
/*
* !!!
@@ -2126,10 +2136,8 @@ __db_salvage_leaf(dbp, vdp, pgno, h, han
int (*callback) __P((void *, const void *));
u_int32_t flags;
{
- ENV *env;
-
- env = dbp->env;
- DB_ASSERT(env, LF_ISSET(DB_SALVAGE));
+ if (!LF_ISSET(DB_SALVAGE))
+ return (EINVAL);
/* If we got this page in the subdb pass, we can safely skip it. */
if (__db_salvage_isdone(vdp, pgno))
@@ -2223,8 +2231,8 @@ __db_salvage_unknowns(dbp, vdp, handle,
ret = t_ret;
break;
case SALVAGE_OVERFLOW:
- DB_ASSERT(env, 0); /* Shouldn't ever happen. */
- break;
+ EPRINT((env, DB_STR("5544", "Invalid page type to salvage.")));
+ return (EINVAL);
case SALVAGE_HASH:
if ((t_ret = __ham_salvage(dbp, vdp,
pgno, h, handle, callback, flags)) != 0 && ret == 0)
@@ -2237,8 +2245,8 @@ __db_salvage_unknowns(dbp, vdp, handle,
* Shouldn't happen, but if it does, just do what the
* nice man says.
*/
- DB_ASSERT(env, 0);
- break;
+ EPRINT((env, DB_STR("5545", "Invalid page type to salvage.")));
+ return (EINVAL);
}
if ((t_ret = __memp_fput(mpf,
vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
@@ -2284,8 +2292,8 @@ __db_salvage_unknowns(dbp, vdp, handle,
ret = t_ret;
break;
default:
- DB_ASSERT(env, 0); /* Shouldn't ever happen. */
- break;
+ EPRINT((env, DB_STR("5546", "Invalid page type to salvage.")));
+ return (EINVAL);
}
if ((t_ret = __memp_fput(mpf,
vdp->thread_info, h, dbp->priority)) != 0 && ret == 0)
@@ -2342,7 +2350,10 @@ __db_vrfy_inpitem(dbp, h, pgno, i, is_bt
env = dbp->env;
- DB_ASSERT(env, himarkp != NULL);
+ if (himarkp == NULL) {
+ __db_msg(env, "Page %lu index has no end.", (u_long)pgno);
+ return (DB_VERIFY_FATAL);
+ }
inp = P_INP(dbp, h);
/*
@@ -2755,7 +2766,11 @@ __db_salvage_subdbpg(dbp, vdp, master, h
goto err;
ovfl_bufsz = bkkey->len + 1;
}
- DB_ASSERT(env, subdbname != NULL);
+ if (subdbname == NULL) {
+ EPRINT((env, DB_STR("5547", "Subdatabase cannot be null.")));
+ ret = EINVAL;
+ goto err;
+ }
memcpy(subdbname, bkkey->data, bkkey->len);
subdbname[bkkey->len] = '\0';
}
--- a/src/db/db_vrfyutil.c
+++ b/src/db/db_vrfyutil.c
@@ -208,7 +208,8 @@ __db_vrfy_getpageinfo(vdp, pgno, pipp)
if ((ret = __db_get(pgdbp,
vdp->thread_info, vdp->txn, &key, &data, 0)) == 0) {
/* Found it. */
- DB_ASSERT(env, data.size == sizeof(VRFY_PAGEINFO));
+ if (data.size != sizeof(VRFY_PAGEINFO))
+ return (DB_VERIFY_FATAL);
pip = data.data;
LIST_INSERT_HEAD(&vdp->activepips, pip, links);
goto found;
@@ -336,7 +337,8 @@ __db_vrfy_pgset_get(dbp, ip, txn, pgno,
F_SET(&data, DB_DBT_USERMEM);
if ((ret = __db_get(dbp, ip, txn, &key, &data, 0)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(int));
+ if (data.size != sizeof(int))
+ return (EINVAL);
} else if (ret == DB_NOTFOUND)
val = 0;
else
@@ -376,7 +378,8 @@ __db_vrfy_pgset_inc(dbp, ip, txn, pgno)
F_SET(&data, DB_DBT_USERMEM);
if ((ret = __db_get(dbp, ip, txn, &key, &data, 0)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(int));
+ if (data.size != sizeof(int))
+ return (DB_VERIFY_FATAL);
} else if (ret != DB_NOTFOUND)
return (ret);
@@ -413,7 +416,8 @@ __db_vrfy_pgset_next(dbc, pgnop)
if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) != 0)
return (ret);
- DB_ASSERT(dbc->env, key.size == sizeof(db_pgno_t));
+ if (key.size != sizeof(db_pgno_t))
+ return (DB_VERIFY_FATAL);
*pgnop = pgno;
return (0);
@@ -560,7 +564,8 @@ __db_vrfy_ccset(dbc, pgno, cipp)
if ((ret = __dbc_get(dbc, &key, &data, DB_SET)) != 0)
return (ret);
- DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO));
+ if (data.size != sizeof(VRFY_CHILDINFO))
+ return (DB_VERIFY_FATAL);
*cipp = (VRFY_CHILDINFO *)data.data;
return (0);
@@ -588,7 +593,8 @@ __db_vrfy_ccnext(dbc, cipp)
if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT_DUP)) != 0)
return (ret);
- DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO));
+ if (data.size != sizeof(VRFY_CHILDINFO))
+ return (DB_VERIFY_FATAL);
*cipp = (VRFY_CHILDINFO *)data.data;
return (0);
@@ -715,7 +721,8 @@ __db_salvage_getnext(vdp, dbcp, pgnop, p
return (ret);
while ((ret = __dbc_get(*dbcp, &key, &data, DB_NEXT)) == 0) {
- DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t));
+ if (data.size != sizeof(u_int32_t))
+ return (DB_VERIFY_FATAL);
memcpy(&pgtype, data.data, sizeof(pgtype));
if (skip_overflow && pgtype == SALVAGE_OVERFLOW)
@@ -724,8 +731,9 @@ __db_salvage_getnext(vdp, dbcp, pgnop, p
if ((ret = __dbc_del(*dbcp, 0)) != 0)
return (ret);
if (pgtype != SALVAGE_IGNORE) {
- DB_ASSERT(dbp->env, key.size == sizeof(db_pgno_t));
- DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t));
+ if (key.size != sizeof(db_pgno_t)
+ || data.size != sizeof(u_int32_t))
+ return (DB_VERIFY_FATAL);
*pgnop = *(db_pgno_t *)key.data;
*pgtypep = *(u_int32_t *)data.data;
--- a/src/db/partition.c
+++ b/src/db/partition.c
@@ -461,9 +461,19 @@ __partition_chk_meta(dbp, ip, txn, flags
} else
part->nparts = meta->nparts;
} else if (meta->nparts != 0 && part->nparts != meta->nparts) {
+ ret = EINVAL;
__db_errx(env, DB_STR("0656",
"Number of partitions does not match."));
+ goto err;
+ }
+ /*
+ * There is no limit on the number of partitions, but I cannot imagine a real
+ * database having more than 10000.
+ */
+ if (meta->nparts > 10000) {
ret = EINVAL;
+ __db_errx(env, DB_STR_A("5553",
+ "Too many partitions %lu", "%lu"), (u_long)(meta->nparts));
goto err;
}
@@ -1874,10 +1884,13 @@ __part_verify(dbp, vdp, fname, handle, c
memcpy(rp->data, key->data, key->size);
B_TSET(rp->type, B_KEYDATA);
}
-vrfy: if ((t_ret = __db_verify(*pdbp, ip, (*pdbp)->fname,
- NULL, handle, callback,
- lp, rp, flags | DB_VERIFY_PARTITION)) != 0 && ret == 0)
- ret = t_ret;
+vrfy: if ((t_ret = __db_verify(*pdbp, ip, (*pdbp)->fname,
+ NULL, handle, callback,
+ lp, rp, flags | DB_VERIFY_PARTITION)) != 0 && ret == 0) {
+ ret = t_ret;
+ if (ret == ENOENT)
+ break;
+ }
}
err: if (lp != NULL)
--- a/src/hash/hash_page.c
+++ b/src/hash/hash_page.c
@@ -865,7 +865,11 @@ __ham_verify_sorted_page (dbc, p)
/* Validate that next, prev pointers are OK */
n = NUM_ENT(p);
dbp = dbc->dbp;
- DB_ASSERT(dbp->env, n%2 == 0 );
+ if (n % 2 != 0) {
+ __db_errx(dbp->env, DB_STR_A("5549",
+ "Odd number of entries on page: %lu", "%lu"), (u_long)(p->pgno));
+ return (DB_VERIFY_FATAL);
+ }
env = dbp->env;
t = dbp->h_internal;
@@ -936,7 +940,12 @@ __ham_verify_sorted_page (dbc, p)
if ((ret = __db_prpage(dbp, p, DB_PR_PAGE)) != 0)
return (ret);
#endif
- DB_ASSERT(dbp->env, res < 0);
+ if (res >= 0) {
+ __db_errx(env, DB_STR_A("5550",
+ "Odd number of entries on page: %lu", "%lu"),
+ (u_long)p->pgno);
+ return (DB_VERIFY_FATAL);
+ }
}
prev = curr;
--- a/src/hash/hash_verify.c
+++ b/src/hash/hash_verify.c
@@ -443,7 +443,7 @@ __ham_vrfy_structure(dbp, vdp, meta_pgno
isbad = 1;
else
goto err;
- }
+ }
/*
* There may be unused hash pages corresponding to buckets
@@ -574,7 +574,7 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, f
"Page %lu: impossible first page in bucket %lu", "%lu %lu"),
(u_long)pgno, (u_long)bucket));
/* Unsafe to continue. */
- isbad = 1;
+ ret = DB_VERIFY_FATAL;
goto err;
}
@@ -604,7 +604,7 @@ __ham_vrfy_bucket(dbp, vdp, m, bucket, f
EPRINT((env, DB_STR_A("1116",
"Page %lu: hash page referenced twice", "%lu"),
(u_long)pgno));
- isbad = 1;
+ ret = DB_VERIFY_FATAL;
/* Unsafe to continue. */
goto err;
} else if ((ret = __db_vrfy_pgset_inc(vdp->pgset,
@@ -1049,7 +1049,11 @@ __ham_meta2pgset(dbp, vdp, hmeta, flags,
COMPQUIET(flags, 0);
ip = vdp->thread_info;
- DB_ASSERT(dbp->env, pgset != NULL);
+ if (pgset == NULL) {
+ EPRINT((dbp->env, DB_STR("5548",
+ "Error, database contains no visible pages.")));
+ return (DB_VERIFY_FATAL);
+ }
mpf = dbp->mpf;
totpgs = 0;
--- a/src/qam/qam_verify.c
+++ b/src/qam/qam_verify.c
@@ -465,7 +465,14 @@ __qam_vrfy_walkqueue(dbp, vdp, handle, c
/* Verify/salvage each page. */
if ((ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0)
return (ret);
-begin: for (; i <= stop; i++) {
+begin: if ((stop - i) > 100000) {
+ EPRINT((env, DB_STR_A("5551",
+"Warning, many possible extends files (%lu), will take a long time to verify",
+ "%lu"), (u_long)(stop - i)));
+ }
+ for (; i <= stop; i++) {
+ if (i == UINT32_MAX)
+ break;
/*
* If DB_SALVAGE is set, we inspect our database of completed
* pages, and skip any we've already printed in the subdb pass.

View File

@@ -0,0 +1,67 @@
Description: Enhance the rtreenode function in order to avoid a heap out-of-bounds read
Origin: https://www.sqlite.org/src/info/90acdbfce9c08858
Bug-Debian: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=929775
--- a/lang/sql/sqlite/ext/rtree/rtree.c
+++ b/lang/sql/sqlite/ext/rtree/rtree.c
@@ -3089,38 +3089,45 @@ static void rtreenode(sqlite3_context *c
RtreeNode node;
Rtree tree;
int ii;
+ int nData;
+ int errCode;
+ sqlite3_str *pOut;
UNUSED_PARAMETER(nArg);
memset(&node, 0, sizeof(RtreeNode));
memset(&tree, 0, sizeof(Rtree));
tree.nDim = sqlite3_value_int(apArg[0]);
+ if( tree.nDim<1 || tree.nDim>5 ) return;
tree.nBytesPerCell = 8 + 8 * tree.nDim;
node.zData = (u8 *)sqlite3_value_blob(apArg[1]);
+ nData = sqlite3_value_bytes(apArg[1]);
+ if( nData<4 ) return;
+ if( nData<NCELL(&node)*tree.nBytesPerCell ) return;
+ pOut = sqlite3_str_new(0);
for(ii=0; ii<NCELL(&node); ii++){
- char zCell[512];
- int nCell = 0;
RtreeCell cell;
int jj;
nodeGetCell(&tree, &node, ii, &cell);
- sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid);
- nCell = strlen(zCell);
+ if( ii>0 ) sqlite3_str_append(pOut, " ", 1);
+ sqlite3_str_appendf(pOut, "{%lld", cell.iRowid);
for(jj=0; jj<tree.nDim*2; jj++){
- sqlite3_snprintf(512-nCell,&zCell[nCell]," %f",(double)cell.aCoord[jj].f);
- nCell = strlen(zCell);
+#ifndef SQLITE_RTREE_INT_ONLY
+ sqlite3_str_appendf(pOut, " %g", (double)cell.aCoord[jj].f);
+#else
+ sqlite3_str_appendf(pOut, " %d", cell.aCoord[jj].i);
+#endif
}
- if( zText ){
- char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell);
- sqlite3_free(zText);
- zText = zTextNew;
- }else{
- zText = sqlite3_mprintf("{%s}", zCell);
- }
+
+ sqlite3_str_append(pOut, "}", 1);
+
}
-
- sqlite3_result_text(ctx, zText, -1, sqlite3_free);
+ errCode = sqlite3_str_errcode(pOut);
+ sqlite3_result_text(ctx, sqlite3_str_finish(pOut), -1, sqlite3_free);
+ sqlite3_result_error_code(ctx, errCode);
+
}
static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){

View File

@@ -0,0 +1,22 @@
Description: CVE-2017-10140: Reads DB_CONFIG from the current working directory
Do not access DB_CONFIG when db_home is not set.
Origin: vendor, https://src.fedoraproject.org/rpms/libdb/raw/8047fa8580659fcae740c25e91b490539b8453eb/f/db-5.3.28-cwd-db_config.patch
Bug-Debian: https://bugs.debian.org/872436
Bug-RedHat: https://bugzilla.redhat.com/show_bug.cgi?id=1464032
Bug-SuSE: https://bugzilla.novell.com/show_bug.cgi?id=1043886
Forwarded: no
Author: Petr Kubat <pkubat@redhat.com>
Reviewed-by: Salvatore Bonaccorso <carnil@debian.org>
Last-Update: 2017-08-17
--- a/src/env/env_open.c
+++ b/src/env/env_open.c
@@ -473,7 +473,7 @@ __env_config(dbenv, db_home, flagsp, mod
env->db_mode = mode == 0 ? DB_MODE_660 : mode;
/* Read the DB_CONFIG file. */
- if ((ret = __env_read_db_config(env)) != 0)
+ if (env->db_home != NULL && (ret = __env_read_db_config(env)) != 0)
return (ret);
/*

View File

@@ -0,0 +1,34 @@
From: Andy Whitcroft <apw@canonical.com>
Subject: [PATCH] MMAP_EXTEND mode requires we extend in full system page increments
Date: Wed, 12 Mar 2014 11:58:31 +0100
When extending a mmap file we must ensure we extend by full system pages,
otherwise there is a risk (when the filesystem page size is smaller than
the system page size) that we will not allocate disk extents to store
the memory and it will be lost resulting in data loss.
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
---
env_file.c | 9 +++++++++
1 file changed, 9 insertions(+)
--- a/src/env/env_file.c
+++ b/src/env/env_file.c
@@ -28,6 +28,15 @@ __db_file_extend(env, fhp, size)
int ret;
char buf;
+#ifdef HAVE_MMAP_EXTEND
+ /*
+ * We have to ensure we extend a mmap'd segment a full memory page at
+ * a time or risk the end of the page not having any filesystem blocks
+ * associated resulting in the data loss.
+ */
+ size = DB_ALIGN(size, getpagesize()) - 1;
+#endif
+
buf = '\0';
/*
* Extend the file by writing the last page. If the region is >4Gb,

View File

@@ -0,0 +1,15 @@
--- a/src/dbinc/mutex_int.h
+++ b/src/dbinc/mutex_int.h
@@ -850,7 +850,11 @@ typedef volatile unsigned char tsl_t;
* alignment locally.
*/
#ifndef MUTEX_ALIGN
-#define MUTEX_ALIGN sizeof(unsigned int)
+# if defined(__linux__) && defined(__sparc__)
+# define MUTEX_ALIGN 8
+# else
+# define MUTEX_ALIGN sizeof(unsigned int)
+# endif
#endif
/*

View File

@@ -0,0 +1,29 @@
--- a/src/dbinc/db_page.h
+++ b/src/dbinc/db_page.h
@@ -256,6 +256,17 @@ typedef struct __pg_crypto {
*/
} PG_CRYPTO;
+/*
+ * With most compilers sizeof(PG_CRYPTO) == 38. However some ABIs
+ * require it to be padded to 40 bytes. The padding must be excluded
+ * from our size calculations due to the 16-byte alignment requirement
+ * for crypto.
+ *
+ * A similar problem applies to PG_CHKSUM, but it's too late to change
+ * that.
+ */
+#define SIZEOF_PG_CRYPTO 38
+
typedef struct _db_page {
DB_LSN lsn; /* 00-07: Log sequence number. */
db_pgno_t pgno; /* 08-11: Current page number. */
@@ -291,7 +302,7 @@ typedef struct _db_page {
*/
#define P_INP(dbp, pg) \
((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE + \
- (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) : \
+ (F_ISSET((dbp), DB_AM_ENCRYPT) ? SIZEOF_PG_CRYPTO : \
(F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0))))
#define P_IV(dbp, pg) \

View File

@@ -1,55 +0,0 @@
--- a/sequence/sequence.c
+++ b/sequence/sequence.c
@@ -187,7 +187,11 @@ __seq_open_pp(seq, txn, keyp, flags)
if ((ret = __db_get_flags(dbp, &tflags)) != 0)
goto err;
- if (DB_IS_READONLY(dbp)) {
+ /*
+ * We can let replication clients open sequences, but must
+ * check later that they do not update them.
+ */
+ if (F_ISSET(dbp, DB_AM_RDONLY)) {
ret = __db_rdonly(dbp->env, "DB_SEQUENCE->open");
goto err;
}
@@ -244,6 +248,11 @@ retry: if ((ret = __db_get(dbp, ip,
if ((ret != DB_NOTFOUND && ret != DB_KEYEMPTY) ||
!LF_ISSET(DB_CREATE))
goto err;
+ if (IS_REP_CLIENT(env) &&
+ !F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ ret = __db_rdonly(env, "DB_SEQUENCE->open");
+ goto err;
+ }
ret = 0;
rp = &seq->seq_record;
@@ -296,7 +305,12 @@ retry: if ((ret = __db_get(dbp, ip,
*/
rp = seq->seq_data.data;
if (rp->seq_version == DB_SEQUENCE_OLDVER) {
-oldver: rp->seq_version = DB_SEQUENCE_VERSION;
+oldver: if (IS_REP_CLIENT(env) &&
+ !F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ ret = __db_rdonly(env, "DB_SEQUENCE->open");
+ goto err;
+ }
+ rp->seq_version = DB_SEQUENCE_VERSION;
if (!F_ISSET(env, ENV_LITTLEENDIAN)) {
if (IS_DB_AUTO_COMMIT(dbp, txn)) {
if ((ret =
@@ -707,6 +721,13 @@ __seq_get(seq, txn, delta, retp, flags)
MUTEX_LOCK(env, seq->mtx_seq);
+ if (handle_check && IS_REP_CLIENT(env) &&
+ !F_ISSET(dbp, DB_AM_NOT_DURABLE)) {
+ ret = __db_rdonly(env, "DB_SEQUENCE->get");
+ goto err;
+ }
+
+
if (rp->seq_min + delta > rp->seq_max) {
__db_errx(env, "Sequence overflow");
ret = EINVAL;

View File

@@ -1,42 +0,0 @@
--- a/lock/lock.c
+++ b/lock/lock.c
@@ -1274,10 +1274,12 @@ __lock_put_internal(lt, lockp, obj_ndx,
SH_TAILQ_REMOVE(
&lt->obj_tab[obj_ndx], sh_obj, links, __db_lockobj);
if (sh_obj->lockobj.size > sizeof(sh_obj->objdata)) {
- LOCK_REGION_LOCK(env);
+ if (region->part_t_size != 1)
+ LOCK_REGION_LOCK(env);
__env_alloc_free(&lt->reginfo,
SH_DBT_PTR(&sh_obj->lockobj));
- LOCK_REGION_UNLOCK(env);
+ if (region->part_t_size != 1)
+ LOCK_REGION_UNLOCK(env);
}
SH_TAILQ_INSERT_HEAD(
&FREE_OBJS(lt, part_id), sh_obj, links, __db_lockobj);
@@ -1467,15 +1469,21 @@ retry: SH_TAILQ_FOREACH(sh_obj, &lt->obj
if (obj->size <= sizeof(sh_obj->objdata))
p = sh_obj->objdata;
else {
- LOCK_REGION_LOCK(env);
+ /*
+ * If we have only one partition, the region is locked.
+ */
+ if (region->part_t_size != 1)
+ LOCK_REGION_LOCK(env);
if ((ret =
__env_alloc(&lt->reginfo, obj->size, &p)) != 0) {
__db_errx(env,
"No space for lock object storage");
- LOCK_REGION_UNLOCK(env);
+ if (region->part_t_size != 1)
+ LOCK_REGION_UNLOCK(env);
goto err;
}
- LOCK_REGION_UNLOCK(env);
+ if (region->part_t_size != 1)
+ LOCK_REGION_UNLOCK(env);
}
memcpy(p, obj->data, obj->size);

View File

@@ -1,211 +0,0 @@
--- a/lock/lock_deadlock.c
+++ b/lock/lock_deadlock.c
@@ -121,7 +121,7 @@ __lock_detect(env, atype, rejectp)
DB_LOCKTAB *lt;
db_timespec now;
locker_info *idmap;
- u_int32_t *bitmap, *copymap, **deadp, **free_me, *tmpmap;
+ u_int32_t *bitmap, *copymap, **deadp, **deadlist, *tmpmap;
u_int32_t i, cid, keeper, killid, limit, nalloc, nlockers;
u_int32_t lock_max, txn_max;
int ret, status;
@@ -133,7 +133,8 @@ __lock_detect(env, atype, rejectp)
if (IS_REP_CLIENT(env))
atype = DB_LOCK_MINWRITE;
- free_me = NULL;
+ copymap = tmpmap = NULL;
+ deadlist = NULL;
lt = env->lk_handle;
if (rejectp != NULL)
@@ -179,11 +180,11 @@ __lock_detect(env, atype, rejectp)
memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc);
if ((ret = __os_calloc(env, sizeof(u_int32_t), nalloc, &tmpmap)) != 0)
- goto err1;
+ goto err;
/* Find a deadlock. */
if ((ret =
- __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadp)) != 0)
+ __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadlist)) != 0)
return (ret);
/*
@@ -204,8 +205,7 @@ __lock_detect(env, atype, rejectp)
txn_max = TXN_MAXIMUM;
killid = BAD_KILLID;
- free_me = deadp;
- for (; *deadp != NULL; deadp++) {
+ for (deadp = deadlist; *deadp != NULL; deadp++) {
if (rejectp != NULL)
++*rejectp;
killid = (u_int32_t)(*deadp - bitmap) / nalloc;
@@ -342,11 +342,12 @@ dokill: if (killid == BAD_KILLID) {
__db_msg(env,
"Aborting locker %lx", (u_long)idmap[killid].id);
}
- __os_free(env, tmpmap);
-err1: __os_free(env, copymap);
-
-err: if (free_me != NULL)
- __os_free(env, free_me);
+err: if(copymap != NULL)
+ __os_free(env, copymap);
+ if (deadlist != NULL)
+ __os_free(env, deadlist);
+ if(tmpmap != NULL)
+ __os_free(env, tmpmap);
__os_free(env, bitmap);
__os_free(env, idmap);
@@ -360,6 +361,17 @@ err: if (free_me != NULL)
#define DD_INVALID_ID ((u_int32_t) -1)
+/*
+ * __dd_build --
+ * Build the lock dependency bit maps.
+ * Notes on synchronization:
+ * LOCK_SYSTEM_LOCK is used to hold objects locked when we have
+ * a single partition.
+ * LOCK_LOCKERS is held while we are walking the lockers list and
+ * to single thread the use of lockerp->dd_id.
+ * LOCK_DD protects the DD list of objects.
+ */
+
static int
__dd_build(env, atype, bmp, nlockers, allocp, idmap, rejectp)
ENV *env;
@@ -393,6 +405,7 @@ __dd_build(env, atype, bmp, nlockers, al
* In particular we do not build the conflict array and our caller
* needs to expect this.
*/
+ LOCK_SYSTEM_LOCK(lt, region);
if (atype == DB_LOCK_EXPIRE) {
skip: LOCK_DD(env, region);
op = SH_TAILQ_FIRST(&region->dd_objs, __db_lockobj);
@@ -430,17 +443,18 @@ skip: LOCK_DD(env, region);
OBJECT_UNLOCK(lt, region, indx);
}
UNLOCK_DD(env, region);
+ LOCK_SYSTEM_UNLOCK(lt, region);
goto done;
}
/*
- * We'll check how many lockers there are, add a few more in for
- * good measure and then allocate all the structures. Then we'll
- * verify that we have enough room when we go back in and get the
- * mutex the second time.
+ * Allocate after locking the region
+ * to make sure the structures are large enough.
*/
-retry: count = region->stat.st_nlockers;
+ LOCK_LOCKERS(env, region);
+ count = region->stat.st_nlockers;
if (count == 0) {
+ UNLOCK_LOCKERS(env, region);
*nlockers = 0;
return (0);
}
@@ -448,50 +462,37 @@ retry: count = region->stat.st_nlockers;
if (FLD_ISSET(env->dbenv->verbose, DB_VERB_DEADLOCK))
__db_msg(env, "%lu lockers", (u_long)count);
- count += 20;
nentries = (u_int32_t)DB_ALIGN(count, 32) / 32;
- /*
- * Allocate enough space for a count by count bitmap matrix.
- *
- * XXX
- * We can probably save the malloc's between iterations just
- * reallocing if necessary because count grew by too much.
- */
+ /* Allocate enough space for a count by count bitmap matrix. */
if ((ret = __os_calloc(env, (size_t)count,
- sizeof(u_int32_t) * nentries, &bitmap)) != 0)
+ sizeof(u_int32_t) * nentries, &bitmap)) != 0) {
+ UNLOCK_LOCKERS(env, region);
return (ret);
+ }
if ((ret = __os_calloc(env,
sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
+ UNLOCK_LOCKERS(env, region);
__os_free(env, bitmap);
return (ret);
}
if ((ret = __os_calloc(env,
(size_t)count, sizeof(locker_info), &id_array)) != 0) {
+ UNLOCK_LOCKERS(env, region);
__os_free(env, bitmap);
__os_free(env, tmpmap);
return (ret);
}
/*
- * Now go back in and actually fill in the matrix.
- */
- if (region->stat.st_nlockers > count) {
- __os_free(env, bitmap);
- __os_free(env, tmpmap);
- __os_free(env, id_array);
- goto retry;
- }
-
- /*
* First we go through and assign each locker a deadlock detector id.
*/
id = 0;
- LOCK_LOCKERS(env, region);
SH_TAILQ_FOREACH(lip, &region->lockers, ulinks, __db_locker) {
if (lip->master_locker == INVALID_ROFF) {
+ DB_ASSERT(env, id < count);
lip->dd_id = id++;
id_array[lip->dd_id].id = lip->id;
switch (atype) {
@@ -510,7 +511,6 @@ retry: count = region->stat.st_nlockers;
lip->dd_id = DD_INVALID_ID;
}
- UNLOCK_LOCKERS(env, region);
/*
* We only need consider objects that have waiters, so we use
@@ -669,7 +669,6 @@ again: memset(bitmap, 0, count * sizeof
* status after building the bit maps so that we will not detect
* a blocked transaction without noting that it is already aborting.
*/
- LOCK_LOCKERS(env, region);
for (id = 0; id < count; id++) {
if (!id_array[id].valid)
continue;
@@ -738,6 +737,7 @@ get_lock: id_array[id].last_lock = R_OF
id_array[id].in_abort = 1;
}
UNLOCK_LOCKERS(env, region);
+ LOCK_SYSTEM_UNLOCK(lt, region);
/*
* Now we can release everything except the bitmap matrix that we
@@ -839,6 +839,7 @@ __dd_abort(env, info, statusp)
ret = 0;
/* We must lock so this locker cannot go away while we abort it. */
+ LOCK_SYSTEM_LOCK(lt, region);
LOCK_LOCKERS(env, region);
/*
@@ -895,6 +896,7 @@ __dd_abort(env, info, statusp)
done: OBJECT_UNLOCK(lt, region, info->last_ndx);
err:
out: UNLOCK_LOCKERS(env, region);
+ LOCK_SYSTEM_UNLOCK(lt, region);
return (ret);
}

View File

@@ -1,118 +0,0 @@
--- a/dbinc/repmgr.h
+++ b/dbinc/repmgr.h
@@ -374,6 +374,7 @@ typedef struct {
#define SITE_FROM_EID(eid) (&db_rep->sites[eid])
#define EID_FROM_SITE(s) ((int)((s) - (&db_rep->sites[0])))
#define IS_VALID_EID(e) ((e) >= 0)
+#define IS_KNOWN_REMOTE_SITE(e) ((e) >= 0 && ((u_int)(e)) < db_rep->site_cnt)
#define SELF_EID INT_MAX
#define IS_PEER_POLICY(p) ((p) == DB_REPMGR_ACKS_ALL_PEERS || \
--- a/rep/rep_elect.c
+++ b/rep/rep_elect.c
@@ -33,7 +33,7 @@ static int __rep_elect_init
static int __rep_fire_elected __P((ENV *, REP *, u_int32_t));
static void __rep_elect_master __P((ENV *, REP *));
static int __rep_tally __P((ENV *, REP *, int, u_int32_t *, u_int32_t, roff_t));
-static int __rep_wait __P((ENV *, db_timeout_t *, int *, int, u_int32_t));
+static int __rep_wait __P((ENV *, db_timeout_t *, int, u_int32_t));
/*
* __rep_elect --
@@ -55,7 +55,7 @@ __rep_elect(dbenv, given_nsites, nvotes,
ENV *env;
LOG *lp;
REP *rep;
- int done, eid, elected, full_elect, locked, in_progress, need_req;
+ int done, elected, full_elect, locked, in_progress, need_req;
int ret, send_vote, t_ret;
u_int32_t ack, ctlflags, egen, nsites, orig_tally, priority, realpri;
u_int32_t tiebreaker;
@@ -181,8 +181,7 @@ __rep_elect(dbenv, given_nsites, nvotes,
REP_SYSTEM_UNLOCK(env);
(void)__rep_send_message(env, DB_EID_BROADCAST,
REP_MASTER_REQ, NULL, NULL, 0, 0);
- ret = __rep_wait(env, &to, &eid,
- 0, REP_F_EPHASE0);
+ ret = __rep_wait(env, &to, 0, REP_F_EPHASE0);
REP_SYSTEM_LOCK(env);
F_CLR(rep, REP_F_EPHASE0);
switch (ret) {
@@ -286,11 +285,11 @@ restart:
REP_SYSTEM_LOCK(env);
goto vote;
}
- ret = __rep_wait(env, &to, &eid, full_elect, REP_F_EPHASE1);
+ ret = __rep_wait(env, &to, full_elect, REP_F_EPHASE1);
switch (ret) {
case 0:
/* Check if election complete or phase complete. */
- if (eid != DB_EID_INVALID && !IN_ELECTION(rep)) {
+ if (!IN_ELECTION(rep)) {
RPRINT(env, DB_VERB_REP_ELECT,
(env, "Ended election phase 1"));
goto edone;
@@ -398,15 +397,12 @@ phase2:
REP_SYSTEM_LOCK(env);
goto i_won;
}
- ret = __rep_wait(env, &to, &eid, full_elect, REP_F_EPHASE2);
+ ret = __rep_wait(env, &to, full_elect, REP_F_EPHASE2);
RPRINT(env, DB_VERB_REP_ELECT,
(env, "Ended election phase 2 %d", ret));
switch (ret) {
case 0:
- if (eid != DB_EID_INVALID)
- goto edone;
- ret = DB_REP_UNAVAIL;
- break;
+ goto edone;
case DB_REP_EGENCHG:
if (to > timeout)
to = timeout;
@@ -1050,13 +1046,6 @@ __rep_elect_master(env, rep)
ENV *env;
REP *rep;
{
- /*
- * We often come through here twice, sometimes even more. We mustn't
- * let the redundant calls affect stats counting. But rep_elect relies
- * on this first part for setting eidp.
- */
- rep->master_id = rep->eid;
-
if (F_ISSET(rep, REP_F_MASTERELECT | REP_F_MASTER)) {
/* We've been through here already; avoid double counting. */
return;
@@ -1093,10 +1082,10 @@ __rep_fire_elected(env, rep, egen)
(timeout > 5000000) ? 500000 : ((timeout >= 10) ? timeout / 10 : 1);
static int
-__rep_wait(env, timeoutp, eidp, full_elect, flags)
+__rep_wait(env, timeoutp, full_elect, flags)
ENV *env;
db_timeout_t *timeoutp;
- int *eidp, full_elect;
+ int full_elect;
u_int32_t flags;
{
DB_REP *db_rep;
@@ -1174,7 +1163,6 @@ __rep_wait(env, timeoutp, eidp, full_ele
F_CLR(rep, REP_F_EGENUPDATE);
ret = DB_REP_EGENCHG;
} else if (phase_over) {
- *eidp = rep->master_id;
done = 1;
ret = 0;
}
--- a/repmgr/repmgr_net.c
+++ b/repmgr/repmgr_net.c
@@ -100,6 +100,8 @@ __repmgr_send(dbenv, control, rec, lsnp,
control, rec, &nsites_sent, &npeers_sent)) != 0)
goto out;
} else {
+ DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(eid));
+
/*
* If this is a request that can be sent anywhere, then see if
* we can send it to our peer (to save load on the master), but

View File

@@ -1,11 +0,0 @@
--- a/repmgr/repmgr_net.c
+++ b/repmgr/repmgr_net.c
@@ -1136,7 +1136,7 @@ __repmgr_listen(env)
}
ret = net_errno;
- __db_err(env, ret, why);
+ __db_err(env, ret, "%s", why);
clean: if (s != INVALID_SOCKET)
(void)closesocket(s);
return (ret);